1 /* 2 * Copyright (c) 1989, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Poul-Henning Kamp of the FreeBSD Project. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95 37 * $FreeBSD$ 38 */ 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/kernel.h> 43 #include <sys/lock.h> 44 #include <sys/mutex.h> 45 #include <sys/sysctl.h> 46 #include <sys/mount.h> 47 #include <sys/vnode.h> 48 #include <sys/namei.h> 49 #include <sys/malloc.h> 50 #include <sys/syscallsubr.h> 51 #include <sys/sysproto.h> 52 #include <sys/proc.h> 53 #include <sys/filedesc.h> 54 #include <sys/fnv_hash.h> 55 56 /* 57 * This structure describes the elements in the cache of recent 58 * names looked up by namei. 59 */ 60 61 struct namecache { 62 LIST_ENTRY(namecache) nc_hash; /* hash chain */ 63 LIST_ENTRY(namecache) nc_src; /* source vnode list */ 64 TAILQ_ENTRY(namecache) nc_dst; /* destination vnode list */ 65 struct vnode *nc_dvp; /* vnode of parent of name */ 66 struct vnode *nc_vp; /* vnode the name refers to */ 67 u_char nc_flag; /* flag bits */ 68 u_char nc_nlen; /* length of name */ 69 char nc_name[0]; /* segment name */ 70 }; 71 72 /* 73 * Name caching works as follows: 74 * 75 * Names found by directory scans are retained in a cache 76 * for future reference. It is managed LRU, so frequently 77 * used names will hang around. Cache is indexed by hash value 78 * obtained from (vp, name) where vp refers to the directory 79 * containing name. 80 * 81 * If it is a "negative" entry, (i.e. for a name that is known NOT to 82 * exist) the vnode pointer will be NULL. 83 * 84 * Upon reaching the last segment of a path, if the reference 85 * is for DELETE, or NOCACHE is set (rewrite), and the 86 * name is located in the cache, it will be dropped. 87 */ 88 89 /* 90 * Structures associated with name cacheing. 91 */ 92 #define NCHHASH(hash) \ 93 (&nchashtbl[(hash) & nchash]) 94 static LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */ 95 static TAILQ_HEAD(, namecache) ncneg; /* Hash Table */ 96 static u_long nchash; /* size of hash table */ 97 SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, ""); 98 static u_long ncnegfactor = 16; /* ratio of negative entries */ 99 SYSCTL_ULONG(_debug, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, ""); 100 static u_long numneg; /* number of cache entries allocated */ 101 SYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, ""); 102 static u_long numcache; /* number of cache entries allocated */ 103 SYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, ""); 104 static u_long numcachehv; /* number of cache entries with vnodes held */ 105 SYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0, ""); 106 #if 0 107 static u_long numcachepl; /* number of cache purge for leaf entries */ 108 SYSCTL_ULONG(_debug, OID_AUTO, numcachepl, CTLFLAG_RD, &numcachepl, 0, ""); 109 #endif 110 struct nchstats nchstats; /* cache effectiveness statistics */ 111 112 static int doingcache = 1; /* 1 => enable the cache */ 113 SYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, ""); 114 115 /* Export size information to userland */ 116 SYSCTL_INT(_debug, OID_AUTO, vnsize, CTLFLAG_RD, 0, sizeof(struct vnode), ""); 117 SYSCTL_INT(_debug, OID_AUTO, ncsize, CTLFLAG_RD, 0, sizeof(struct namecache), ""); 118 119 /* 120 * The new name cache statistics 121 */ 122 SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0, "Name cache statistics"); 123 #define STATNODE(mode, name, var) \ 124 SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, ""); 125 STATNODE(CTLFLAG_RD, numneg, &numneg); 126 STATNODE(CTLFLAG_RD, numcache, &numcache); 127 static u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls); 128 static u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits); 129 static u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits); 130 static u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks); 131 static u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss); 132 static u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap); 133 static u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps); 134 static u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits); 135 static u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps); 136 static u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits); 137 138 SYSCTL_OPAQUE(_vfs_cache, OID_AUTO, nchstats, CTLFLAG_RD, &nchstats, 139 sizeof(nchstats), "LU", "VFS cache effectiveness statistics"); 140 141 142 143 static void cache_zap(struct namecache *ncp); 144 145 static MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries"); 146 147 /* 148 * Flags in namecache.nc_flag 149 */ 150 #define NCF_WHITE 1 151 152 /* 153 * Grab an atomic snapshot of the name cache hash chain lengths 154 */ 155 SYSCTL_NODE(_debug, OID_AUTO, hashstat, CTLFLAG_RW, NULL, "hash table stats"); 156 157 static int 158 sysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS) 159 { 160 int error; 161 struct nchashhead *ncpp; 162 struct namecache *ncp; 163 int n_nchash; 164 int count; 165 166 n_nchash = nchash + 1; /* nchash is max index, not count */ 167 if (!req->oldptr) 168 return SYSCTL_OUT(req, 0, n_nchash * sizeof(int)); 169 170 /* Scan hash tables for applicable entries */ 171 for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { 172 count = 0; 173 LIST_FOREACH(ncp, ncpp, nc_hash) { 174 count++; 175 } 176 error = SYSCTL_OUT(req, &count, sizeof(count)); 177 if (error) 178 return (error); 179 } 180 return (0); 181 } 182 SYSCTL_PROC(_debug_hashstat, OID_AUTO, rawnchash, CTLTYPE_INT|CTLFLAG_RD, 183 0, 0, sysctl_debug_hashstat_rawnchash, "S,int", "nchash chain lengths"); 184 185 static int 186 sysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS) 187 { 188 int error; 189 struct nchashhead *ncpp; 190 struct namecache *ncp; 191 int n_nchash; 192 int count, maxlength, used, pct; 193 194 if (!req->oldptr) 195 return SYSCTL_OUT(req, 0, 4 * sizeof(int)); 196 197 n_nchash = nchash + 1; /* nchash is max index, not count */ 198 used = 0; 199 maxlength = 0; 200 201 /* Scan hash tables for applicable entries */ 202 for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { 203 count = 0; 204 LIST_FOREACH(ncp, ncpp, nc_hash) { 205 count++; 206 } 207 if (count) 208 used++; 209 if (maxlength < count) 210 maxlength = count; 211 } 212 n_nchash = nchash + 1; 213 pct = (used * 100 * 100) / n_nchash; 214 error = SYSCTL_OUT(req, &n_nchash, sizeof(n_nchash)); 215 if (error) 216 return (error); 217 error = SYSCTL_OUT(req, &used, sizeof(used)); 218 if (error) 219 return (error); 220 error = SYSCTL_OUT(req, &maxlength, sizeof(maxlength)); 221 if (error) 222 return (error); 223 error = SYSCTL_OUT(req, &pct, sizeof(pct)); 224 if (error) 225 return (error); 226 return (0); 227 } 228 SYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE_INT|CTLFLAG_RD, 229 0, 0, sysctl_debug_hashstat_nchash, "I", "nchash chain lengths"); 230 231 /* 232 * Delete an entry from its hash list and move it to the front 233 * of the LRU list for immediate reuse. 234 */ 235 static void 236 cache_zap(ncp) 237 struct namecache *ncp; 238 { 239 LIST_REMOVE(ncp, nc_hash); 240 LIST_REMOVE(ncp, nc_src); 241 if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) { 242 vdrop(ncp->nc_dvp); 243 numcachehv--; 244 } 245 if (ncp->nc_vp) { 246 TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst); 247 } else { 248 TAILQ_REMOVE(&ncneg, ncp, nc_dst); 249 numneg--; 250 } 251 numcache--; 252 free(ncp, M_VFSCACHE); 253 } 254 255 /* 256 * cache_leaf_test() 257 * 258 * Test whether this (directory) vnode's namei cache entry contains 259 * subdirectories or not. Used to determine whether the directory is 260 * a leaf in the namei cache or not. Note: the directory may still 261 * contain files in the namei cache. 262 * 263 * Returns 0 if the directory is a leaf, -1 if it isn't. 264 */ 265 int 266 cache_leaf_test(struct vnode *vp) 267 { 268 struct namecache *ncpc; 269 270 for (ncpc = LIST_FIRST(&vp->v_cache_src); 271 ncpc != NULL; 272 ncpc = LIST_NEXT(ncpc, nc_src) 273 ) { 274 if (ncpc->nc_vp != NULL && ncpc->nc_vp->v_type == VDIR) 275 return(-1); 276 } 277 return(0); 278 } 279 280 /* 281 * Lookup an entry in the cache 282 * 283 * Lookup is called with dvp pointing to the directory to search, 284 * cnp pointing to the name of the entry being sought. If the lookup 285 * succeeds, the vnode is returned in *vpp, and a status of -1 is 286 * returned. If the lookup determines that the name does not exist 287 * (negative cacheing), a status of ENOENT is returned. If the lookup 288 * fails, a status of zero is returned. 289 */ 290 291 int 292 cache_lookup(dvp, vpp, cnp) 293 struct vnode *dvp; 294 struct vnode **vpp; 295 struct componentname *cnp; 296 { 297 struct namecache *ncp; 298 u_int32_t hash; 299 300 if (!doingcache) { 301 cnp->cn_flags &= ~MAKEENTRY; 302 return (0); 303 } 304 305 numcalls++; 306 307 if (cnp->cn_nameptr[0] == '.') { 308 if (cnp->cn_namelen == 1) { 309 *vpp = dvp; 310 dothits++; 311 return (-1); 312 } 313 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 314 dotdothits++; 315 if (dvp->v_dd->v_id != dvp->v_ddid || 316 (cnp->cn_flags & MAKEENTRY) == 0) { 317 dvp->v_ddid = 0; 318 return (0); 319 } 320 *vpp = dvp->v_dd; 321 return (-1); 322 } 323 } 324 325 hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT); 326 hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash); 327 LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) { 328 numchecks++; 329 if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen && 330 !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen)) 331 break; 332 } 333 334 /* We failed to find an entry */ 335 if (ncp == 0) { 336 if ((cnp->cn_flags & MAKEENTRY) == 0) { 337 nummisszap++; 338 } else { 339 nummiss++; 340 } 341 nchstats.ncs_miss++; 342 return (0); 343 } 344 345 /* We don't want to have an entry, so dump it */ 346 if ((cnp->cn_flags & MAKEENTRY) == 0) { 347 numposzaps++; 348 nchstats.ncs_badhits++; 349 cache_zap(ncp); 350 return (0); 351 } 352 353 /* We found a "positive" match, return the vnode */ 354 if (ncp->nc_vp) { 355 numposhits++; 356 nchstats.ncs_goodhits++; 357 *vpp = ncp->nc_vp; 358 return (-1); 359 } 360 361 /* We found a negative match, and want to create it, so purge */ 362 if (cnp->cn_nameiop == CREATE) { 363 numnegzaps++; 364 nchstats.ncs_badhits++; 365 cache_zap(ncp); 366 return (0); 367 } 368 369 numneghits++; 370 /* 371 * We found a "negative" match, ENOENT notifies client of this match. 372 * The nc_vpid field records whether this is a whiteout. 373 */ 374 TAILQ_REMOVE(&ncneg, ncp, nc_dst); 375 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 376 nchstats.ncs_neghits++; 377 if (ncp->nc_flag & NCF_WHITE) 378 cnp->cn_flags |= ISWHITEOUT; 379 return (ENOENT); 380 } 381 382 /* 383 * Add an entry to the cache. 384 */ 385 void 386 cache_enter(dvp, vp, cnp) 387 struct vnode *dvp; 388 struct vnode *vp; 389 struct componentname *cnp; 390 { 391 struct namecache *ncp; 392 struct nchashhead *ncpp; 393 u_int32_t hash; 394 int len; 395 396 if (!doingcache) 397 return; 398 399 if (cnp->cn_nameptr[0] == '.') { 400 if (cnp->cn_namelen == 1) { 401 return; 402 } 403 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 404 if (vp) { 405 dvp->v_dd = vp; 406 dvp->v_ddid = vp->v_id; 407 } else { 408 dvp->v_dd = dvp; 409 dvp->v_ddid = 0; 410 } 411 return; 412 } 413 } 414 415 ncp = (struct namecache *) 416 malloc(sizeof *ncp + cnp->cn_namelen, M_VFSCACHE, M_WAITOK); 417 bzero((char *)ncp, sizeof *ncp); 418 numcache++; 419 if (!vp) { 420 numneg++; 421 ncp->nc_flag = cnp->cn_flags & ISWHITEOUT ? NCF_WHITE : 0; 422 } else if (vp->v_type == VDIR) { 423 vp->v_dd = dvp; 424 vp->v_ddid = dvp->v_id; 425 } 426 427 /* 428 * Fill in cache info, if vp is NULL this is a "negative" cache entry. 429 * For negative entries, we have to record whether it is a whiteout. 430 * the whiteout flag is stored in the nc_vpid field which is 431 * otherwise unused. 432 */ 433 ncp->nc_vp = vp; 434 ncp->nc_dvp = dvp; 435 len = ncp->nc_nlen = cnp->cn_namelen; 436 hash = fnv_32_buf(cnp->cn_nameptr, len, FNV1_32_INIT); 437 bcopy(cnp->cn_nameptr, ncp->nc_name, len); 438 hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash); 439 ncpp = NCHHASH(hash); 440 LIST_INSERT_HEAD(ncpp, ncp, nc_hash); 441 if (LIST_EMPTY(&dvp->v_cache_src)) { 442 vhold(dvp); 443 numcachehv++; 444 } 445 LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src); 446 if (vp) { 447 TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst); 448 } else { 449 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 450 } 451 if (numneg * ncnegfactor > numcache) { 452 ncp = TAILQ_FIRST(&ncneg); 453 cache_zap(ncp); 454 } 455 } 456 457 /* 458 * Name cache initialization, from vfs_init() when we are booting 459 */ 460 static void 461 nchinit(void *dummy __unused) 462 { 463 464 TAILQ_INIT(&ncneg); 465 nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash); 466 } 467 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL) 468 469 470 /* 471 * Invalidate all entries to a particular vnode. 472 * 473 * Remove all entries in the namecache relating to this vnode and 474 * change the v_id. We take the v_id from a global counter, since 475 * it becomes a handy sequence number in crash-dumps that way. 476 * No valid vnode will ever have (v_id == 0). 477 * 478 * XXX: Only time and the size of v_id prevents this from failing: 479 * XXX: In theory we should hunt down all (struct vnode*, v_id) 480 * XXX: soft references and nuke them, at least on the global 481 * XXX: v_id wraparound. The period of resistance can be extended 482 * XXX: by incrementing each vnodes v_id individually instead of 483 * XXX: using the global v_id. 484 */ 485 486 void 487 cache_purge(vp) 488 struct vnode *vp; 489 { 490 static u_long nextid; 491 492 while (!LIST_EMPTY(&vp->v_cache_src)) 493 cache_zap(LIST_FIRST(&vp->v_cache_src)); 494 while (!TAILQ_EMPTY(&vp->v_cache_dst)) 495 cache_zap(TAILQ_FIRST(&vp->v_cache_dst)); 496 497 do 498 nextid++; 499 while (nextid == vp->v_id || !nextid); 500 vp->v_id = nextid; 501 vp->v_dd = vp; 502 vp->v_ddid = 0; 503 } 504 505 /* 506 * Flush all entries referencing a particular filesystem. 507 * 508 * Since we need to check it anyway, we will flush all the invalid 509 * entries at the same time. 510 */ 511 void 512 cache_purgevfs(mp) 513 struct mount *mp; 514 { 515 struct nchashhead *ncpp; 516 struct namecache *ncp, *nnp; 517 518 /* Scan hash tables for applicable entries */ 519 for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) { 520 for (ncp = LIST_FIRST(ncpp); ncp != 0; ncp = nnp) { 521 nnp = LIST_NEXT(ncp, nc_hash); 522 if (ncp->nc_dvp->v_mount == mp) { 523 cache_zap(ncp); 524 } 525 } 526 } 527 } 528 529 /* 530 * Perform canonical checks and cache lookup and pass on to filesystem 531 * through the vop_cachedlookup only if needed. 532 */ 533 534 int 535 vfs_cache_lookup(ap) 536 struct vop_lookup_args /* { 537 struct vnode *a_dvp; 538 struct vnode **a_vpp; 539 struct componentname *a_cnp; 540 } */ *ap; 541 { 542 struct vnode *dvp, *vp; 543 int lockparent; 544 int error; 545 struct vnode **vpp = ap->a_vpp; 546 struct componentname *cnp = ap->a_cnp; 547 struct ucred *cred = cnp->cn_cred; 548 int flags = cnp->cn_flags; 549 struct thread *td = cnp->cn_thread; 550 u_long vpid; /* capability number of vnode */ 551 552 *vpp = NULL; 553 dvp = ap->a_dvp; 554 lockparent = flags & LOCKPARENT; 555 556 if (dvp->v_type != VDIR) 557 return (ENOTDIR); 558 559 if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && 560 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) 561 return (EROFS); 562 563 error = VOP_ACCESS(dvp, VEXEC, cred, td); 564 565 if (error) 566 return (error); 567 568 error = cache_lookup(dvp, vpp, cnp); 569 570 #ifdef LOOKUP_SHARED 571 if (!error) { 572 /* We do this because the rest of the system now expects to get 573 * a shared lock, which is later upgraded if LOCKSHARED is not 574 * set. We have so many cases here because of bugs that yield 575 * inconsistant lock states. This all badly needs to be fixed 576 */ 577 error = VOP_CACHEDLOOKUP(dvp, vpp, cnp); 578 if (!error) { 579 int flock; 580 581 flock = VOP_ISLOCKED(*vpp, td); 582 if (flock != LK_EXCLUSIVE) { 583 if (flock == 0) { 584 if ((flags & ISLASTCN) && 585 (flags & LOCKSHARED)) 586 VOP_LOCK(*vpp, LK_SHARED, td); 587 else 588 VOP_LOCK(*vpp, LK_EXCLUSIVE, td); 589 } 590 } else if ((flags & ISLASTCN) && (flags & LOCKSHARED)) 591 VOP_LOCK(*vpp, LK_DOWNGRADE, td); 592 } 593 return (error); 594 } 595 #else 596 if (!error) 597 return (VOP_CACHEDLOOKUP(dvp, vpp, cnp)); 598 #endif 599 600 if (error == ENOENT) 601 return (error); 602 603 vp = *vpp; 604 vpid = vp->v_id; 605 cnp->cn_flags &= ~PDIRUNLOCK; 606 if (dvp == vp) { /* lookup on "." */ 607 VREF(vp); 608 error = 0; 609 } else if (flags & ISDOTDOT) { 610 VOP_UNLOCK(dvp, 0, td); 611 cnp->cn_flags |= PDIRUNLOCK; 612 #ifdef LOOKUP_SHARED 613 if ((flags & ISLASTCN) && (flags & LOCKSHARED)) 614 error = vget(vp, LK_SHARED, td); 615 else 616 error = vget(vp, LK_EXCLUSIVE, td); 617 #else 618 error = vget(vp, LK_EXCLUSIVE, td); 619 #endif 620 621 if (!error && lockparent && (flags & ISLASTCN)) { 622 if ((error = vn_lock(dvp, LK_EXCLUSIVE, td)) == 0) 623 cnp->cn_flags &= ~PDIRUNLOCK; 624 } 625 } else { 626 #ifdef LOOKUP_SHARED 627 if ((flags & ISLASTCN) && (flags & LOCKSHARED)) 628 error = vget(vp, LK_SHARED, td); 629 else 630 error = vget(vp, LK_EXCLUSIVE, td); 631 #else 632 error = vget(vp, LK_EXCLUSIVE, td); 633 #endif 634 if (!lockparent || error || !(flags & ISLASTCN)) { 635 VOP_UNLOCK(dvp, 0, td); 636 cnp->cn_flags |= PDIRUNLOCK; 637 } 638 } 639 /* 640 * Check that the capability number did not change 641 * while we were waiting for the lock. 642 */ 643 if (!error) { 644 if (vpid == vp->v_id) 645 return (0); 646 vput(vp); 647 if (lockparent && dvp != vp && (flags & ISLASTCN)) { 648 VOP_UNLOCK(dvp, 0, td); 649 cnp->cn_flags |= PDIRUNLOCK; 650 } 651 } 652 if (cnp->cn_flags & PDIRUNLOCK) { 653 error = vn_lock(dvp, LK_EXCLUSIVE, td); 654 if (error) 655 return (error); 656 cnp->cn_flags &= ~PDIRUNLOCK; 657 } 658 #ifdef LOOKUP_SHARED 659 error = VOP_CACHEDLOOKUP(dvp, vpp, cnp); 660 661 if (!error) { 662 int flock = 0; 663 664 flock = VOP_ISLOCKED(*vpp, td); 665 if (flock != LK_EXCLUSIVE) { 666 if (flock == 0) { 667 if ((flags & ISLASTCN) && (flags & LOCKSHARED)) 668 VOP_LOCK(*vpp, LK_SHARED, td); 669 else 670 VOP_LOCK(*vpp, LK_EXCLUSIVE, td); 671 } 672 } else if ((flags & ISLASTCN) && (flags & LOCKSHARED)) 673 VOP_LOCK(*vpp, LK_DOWNGRADE, td); 674 } 675 676 return (error); 677 #else 678 return (VOP_CACHEDLOOKUP(dvp, vpp, cnp)); 679 #endif 680 } 681 682 683 #ifndef _SYS_SYSPROTO_H_ 684 struct __getcwd_args { 685 u_char *buf; 686 u_int buflen; 687 }; 688 #endif 689 690 /* 691 * XXX All of these sysctls would probably be more productive dead. 692 */ 693 static int disablecwd; 694 SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0, 695 "Disable the getcwd syscall"); 696 697 /* Various statistics for the getcwd syscall */ 698 static u_long numcwdcalls; STATNODE(CTLFLAG_RD, numcwdcalls, &numcwdcalls); 699 static u_long numcwdfail1; STATNODE(CTLFLAG_RD, numcwdfail1, &numcwdfail1); 700 static u_long numcwdfail2; STATNODE(CTLFLAG_RD, numcwdfail2, &numcwdfail2); 701 static u_long numcwdfail3; STATNODE(CTLFLAG_RD, numcwdfail3, &numcwdfail3); 702 static u_long numcwdfail4; STATNODE(CTLFLAG_RD, numcwdfail4, &numcwdfail4); 703 static u_long numcwdfound; STATNODE(CTLFLAG_RD, numcwdfound, &numcwdfound); 704 705 /* Implementation of the getcwd syscall */ 706 int 707 __getcwd(td, uap) 708 struct thread *td; 709 struct __getcwd_args *uap; 710 { 711 712 return (kern___getcwd(td, uap->buf, UIO_USERSPACE, uap->buflen)); 713 } 714 715 int 716 kern___getcwd(struct thread *td, u_char *buf, enum uio_seg bufseg, u_int buflen) 717 { 718 char *bp, *tmpbuf; 719 int error, i, slash_prefixed; 720 struct filedesc *fdp; 721 struct namecache *ncp; 722 struct vnode *vp; 723 724 numcwdcalls++; 725 if (disablecwd) 726 return (ENODEV); 727 if (buflen < 2) 728 return (EINVAL); 729 if (buflen > MAXPATHLEN) 730 buflen = MAXPATHLEN; 731 error = 0; 732 tmpbuf = bp = malloc(buflen, M_TEMP, M_WAITOK); 733 bp += buflen - 1; 734 *bp = '\0'; 735 fdp = td->td_proc->p_fd; 736 slash_prefixed = 0; 737 FILEDESC_LOCK(fdp); 738 mp_fixme("No vnode locking done!"); 739 for (vp = fdp->fd_cdir; vp != fdp->fd_rdir && vp != rootvnode;) { 740 if (vp->v_vflag & VV_ROOT) { 741 if (vp->v_mount == NULL) { /* forced unmount */ 742 FILEDESC_UNLOCK(fdp); 743 free(tmpbuf, M_TEMP); 744 return (EBADF); 745 } 746 vp = vp->v_mount->mnt_vnodecovered; 747 continue; 748 } 749 if (vp->v_dd->v_id != vp->v_ddid) { 750 FILEDESC_UNLOCK(fdp); 751 numcwdfail1++; 752 free(tmpbuf, M_TEMP); 753 return (ENOTDIR); 754 } 755 ncp = TAILQ_FIRST(&vp->v_cache_dst); 756 if (!ncp) { 757 FILEDESC_UNLOCK(fdp); 758 numcwdfail2++; 759 free(tmpbuf, M_TEMP); 760 return (ENOENT); 761 } 762 if (ncp->nc_dvp != vp->v_dd) { 763 FILEDESC_UNLOCK(fdp); 764 numcwdfail3++; 765 free(tmpbuf, M_TEMP); 766 return (EBADF); 767 } 768 for (i = ncp->nc_nlen - 1; i >= 0; i--) { 769 if (bp == tmpbuf) { 770 FILEDESC_UNLOCK(fdp); 771 numcwdfail4++; 772 free(tmpbuf, M_TEMP); 773 return (ENOMEM); 774 } 775 *--bp = ncp->nc_name[i]; 776 } 777 if (bp == tmpbuf) { 778 FILEDESC_UNLOCK(fdp); 779 numcwdfail4++; 780 free(tmpbuf, M_TEMP); 781 return (ENOMEM); 782 } 783 *--bp = '/'; 784 slash_prefixed = 1; 785 vp = vp->v_dd; 786 } 787 FILEDESC_UNLOCK(fdp); 788 if (!slash_prefixed) { 789 if (bp == tmpbuf) { 790 numcwdfail4++; 791 free(tmpbuf, M_TEMP); 792 return (ENOMEM); 793 } 794 *--bp = '/'; 795 } 796 numcwdfound++; 797 if (bufseg == UIO_SYSSPACE) 798 bcopy(bp, buf, strlen(bp) + 1); 799 else 800 error = copyout(bp, buf, strlen(bp) + 1); 801 free(tmpbuf, M_TEMP); 802 return (error); 803 } 804 805 /* 806 * Thus begins the fullpath magic. 807 */ 808 809 #undef STATNODE 810 #define STATNODE(name) \ 811 static u_int name; \ 812 SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, "") 813 814 static int disablefullpath; 815 SYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, &disablefullpath, 0, 816 "Disable the vn_fullpath function"); 817 818 STATNODE(numfullpathcalls); 819 STATNODE(numfullpathfail1); 820 STATNODE(numfullpathfail2); 821 STATNODE(numfullpathfail3); 822 STATNODE(numfullpathfail4); 823 STATNODE(numfullpathfound); 824 825 /* 826 * Retrieve the full filesystem path that correspond to a vnode from the name 827 * cache (if available) 828 */ 829 int 830 vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf) 831 { 832 char *bp, *buf; 833 int i, slash_prefixed; 834 struct filedesc *fdp; 835 struct namecache *ncp; 836 struct vnode *vp; 837 838 numfullpathcalls++; 839 if (disablefullpath) 840 return (ENODEV); 841 if (vn == NULL) 842 return (EINVAL); 843 buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 844 bp = buf + MAXPATHLEN - 1; 845 *bp = '\0'; 846 fdp = td->td_proc->p_fd; 847 slash_prefixed = 0; 848 FILEDESC_LOCK(fdp); 849 for (vp = vn; vp != fdp->fd_rdir && vp != rootvnode;) { 850 ASSERT_VOP_LOCKED(vp, "vn_fullpath"); 851 if (vp->v_vflag & VV_ROOT) { 852 if (vp->v_mount == NULL) { /* forced unmount */ 853 FILEDESC_UNLOCK(fdp); 854 free(buf, M_TEMP); 855 return (EBADF); 856 } 857 vp = vp->v_mount->mnt_vnodecovered; 858 continue; 859 } 860 if (vp != vn && vp->v_dd->v_id != vp->v_ddid) { 861 FILEDESC_UNLOCK(fdp); 862 numfullpathfail1++; 863 free(buf, M_TEMP); 864 return (ENOTDIR); 865 } 866 ncp = TAILQ_FIRST(&vp->v_cache_dst); 867 if (!ncp) { 868 FILEDESC_UNLOCK(fdp); 869 numfullpathfail2++; 870 free(buf, M_TEMP); 871 return (ENOENT); 872 } 873 if (vp != vn && ncp->nc_dvp != vp->v_dd) { 874 FILEDESC_UNLOCK(fdp); 875 numfullpathfail3++; 876 free(buf, M_TEMP); 877 return (EBADF); 878 } 879 for (i = ncp->nc_nlen - 1; i >= 0; i--) { 880 if (bp == buf) { 881 FILEDESC_UNLOCK(fdp); 882 numfullpathfail4++; 883 free(buf, M_TEMP); 884 return (ENOMEM); 885 } 886 *--bp = ncp->nc_name[i]; 887 } 888 if (bp == buf) { 889 FILEDESC_UNLOCK(fdp); 890 numfullpathfail4++; 891 free(buf, M_TEMP); 892 return (ENOMEM); 893 } 894 *--bp = '/'; 895 slash_prefixed = 1; 896 vp = ncp->nc_dvp; 897 } 898 if (!slash_prefixed) { 899 if (bp == buf) { 900 FILEDESC_UNLOCK(fdp); 901 numfullpathfail4++; 902 free(buf, M_TEMP); 903 return (ENOMEM); 904 } 905 *--bp = '/'; 906 } 907 FILEDESC_UNLOCK(fdp); 908 numfullpathfound++; 909 *retbuf = bp; 910 *freebuf = buf; 911 return (0); 912 } 913