1 /*- 2 * Copyright (c) 1989, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Poul-Henning Kamp of the FreeBSD Project. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include <sys/param.h> 39 #include <sys/filedesc.h> 40 #include <sys/fnv_hash.h> 41 #include <sys/kernel.h> 42 #include <sys/lock.h> 43 #include <sys/malloc.h> 44 #include <sys/mount.h> 45 #include <sys/namei.h> 46 #include <sys/proc.h> 47 #include <sys/rwlock.h> 48 #include <sys/syscallsubr.h> 49 #include <sys/sysctl.h> 50 #include <sys/sysproto.h> 51 #include <sys/systm.h> 52 #include <sys/vnode.h> 53 54 #include <vm/uma.h> 55 56 /* 57 * This structure describes the elements in the cache of recent 58 * names looked up by namei. 59 */ 60 61 struct namecache { 62 LIST_ENTRY(namecache) nc_hash; /* hash chain */ 63 LIST_ENTRY(namecache) nc_src; /* source vnode list */ 64 TAILQ_ENTRY(namecache) nc_dst; /* destination vnode list */ 65 struct vnode *nc_dvp; /* vnode of parent of name */ 66 struct vnode *nc_vp; /* vnode the name refers to */ 67 u_char nc_flag; /* flag bits */ 68 u_char nc_nlen; /* length of name */ 69 char nc_name[0]; /* segment name */ 70 }; 71 72 /* 73 * Name caching works as follows: 74 * 75 * Names found by directory scans are retained in a cache 76 * for future reference. It is managed LRU, so frequently 77 * used names will hang around. Cache is indexed by hash value 78 * obtained from (vp, name) where vp refers to the directory 79 * containing name. 80 * 81 * If it is a "negative" entry, (i.e. for a name that is known NOT to 82 * exist) the vnode pointer will be NULL. 83 * 84 * Upon reaching the last segment of a path, if the reference 85 * is for DELETE, or NOCACHE is set (rewrite), and the 86 * name is located in the cache, it will be dropped. 87 */ 88 89 /* 90 * Structures associated with name cacheing. 91 */ 92 #define NCHHASH(hash) \ 93 (&nchashtbl[(hash) & nchash]) 94 static LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */ 95 static TAILQ_HEAD(, namecache) ncneg; /* Hash Table */ 96 static u_long nchash; /* size of hash table */ 97 SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, ""); 98 static u_long ncnegfactor = 16; /* ratio of negative entries */ 99 SYSCTL_ULONG(_debug, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, ""); 100 static u_long numneg; /* number of cache entries allocated */ 101 SYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, ""); 102 static u_long numcache; /* number of cache entries allocated */ 103 SYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, ""); 104 static u_long numcachehv; /* number of cache entries with vnodes held */ 105 SYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0, ""); 106 #if 0 107 static u_long numcachepl; /* number of cache purge for leaf entries */ 108 SYSCTL_ULONG(_debug, OID_AUTO, numcachepl, CTLFLAG_RD, &numcachepl, 0, ""); 109 #endif 110 struct nchstats nchstats; /* cache effectiveness statistics */ 111 112 static struct rwlock cache_lock; 113 RW_SYSINIT(vfscache, &cache_lock, "Name Cache"); 114 115 #define CACHE_UPGRADE_LOCK() rw_try_upgrade(&cache_lock) 116 #define CACHE_RLOCK() rw_rlock(&cache_lock) 117 #define CACHE_RUNLOCK() rw_runlock(&cache_lock) 118 #define CACHE_WLOCK() rw_wlock(&cache_lock) 119 #define CACHE_WUNLOCK() rw_wunlock(&cache_lock) 120 121 /* 122 * UMA zones for the VFS cache. 123 * 124 * The small cache is used for entries with short names, which are the 125 * most common. The large cache is used for entries which are too big to 126 * fit in the small cache. 127 */ 128 static uma_zone_t cache_zone_small; 129 static uma_zone_t cache_zone_large; 130 131 #define CACHE_PATH_CUTOFF 32 132 #define CACHE_ZONE_SMALL (sizeof(struct namecache) + CACHE_PATH_CUTOFF) 133 #define CACHE_ZONE_LARGE (sizeof(struct namecache) + NAME_MAX) 134 135 #define cache_alloc(len) uma_zalloc(((len) <= CACHE_PATH_CUTOFF) ? \ 136 cache_zone_small : cache_zone_large, M_WAITOK) 137 #define cache_free(ncp) do { \ 138 if (ncp != NULL) \ 139 uma_zfree(((ncp)->nc_nlen <= CACHE_PATH_CUTOFF) ? \ 140 cache_zone_small : cache_zone_large, (ncp)); \ 141 } while (0) 142 143 static int doingcache = 1; /* 1 => enable the cache */ 144 SYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, ""); 145 146 /* Export size information to userland */ 147 SYSCTL_INT(_debug_sizeof, OID_AUTO, namecache, CTLFLAG_RD, 0, 148 sizeof(struct namecache), ""); 149 150 /* 151 * The new name cache statistics 152 */ 153 static SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0, "Name cache statistics"); 154 #define STATNODE(mode, name, var) \ 155 SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, ""); 156 STATNODE(CTLFLAG_RD, numneg, &numneg); 157 STATNODE(CTLFLAG_RD, numcache, &numcache); 158 static u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls); 159 static u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits); 160 static u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits); 161 static u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks); 162 static u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss); 163 static u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap); 164 static u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps); 165 static u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits); 166 static u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps); 167 static u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits); 168 static u_long numupgrades; STATNODE(CTLFLAG_RD, numupgrades, &numupgrades); 169 170 SYSCTL_OPAQUE(_vfs_cache, OID_AUTO, nchstats, CTLFLAG_RD | CTLFLAG_MPSAFE, 171 &nchstats, sizeof(nchstats), "LU", "VFS cache effectiveness statistics"); 172 173 174 175 static void cache_zap(struct namecache *ncp); 176 static int vn_vptocnp(struct vnode **vp, char **bp, char *buf, u_int *buflen); 177 static int vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir, 178 char *buf, char **retbuf, u_int buflen); 179 180 static MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries"); 181 182 /* 183 * Flags in namecache.nc_flag 184 */ 185 #define NCF_WHITE 1 186 187 #ifdef DIAGNOSTIC 188 /* 189 * Grab an atomic snapshot of the name cache hash chain lengths 190 */ 191 SYSCTL_NODE(_debug, OID_AUTO, hashstat, CTLFLAG_RW, NULL, "hash table stats"); 192 193 static int 194 sysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS) 195 { 196 int error; 197 struct nchashhead *ncpp; 198 struct namecache *ncp; 199 int n_nchash; 200 int count; 201 202 n_nchash = nchash + 1; /* nchash is max index, not count */ 203 if (!req->oldptr) 204 return SYSCTL_OUT(req, 0, n_nchash * sizeof(int)); 205 206 /* Scan hash tables for applicable entries */ 207 for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { 208 CACHE_RLOCK(); 209 count = 0; 210 LIST_FOREACH(ncp, ncpp, nc_hash) { 211 count++; 212 } 213 CACHE_RUNLOCK(); 214 error = SYSCTL_OUT(req, &count, sizeof(count)); 215 if (error) 216 return (error); 217 } 218 return (0); 219 } 220 SYSCTL_PROC(_debug_hashstat, OID_AUTO, rawnchash, CTLTYPE_INT|CTLFLAG_RD| 221 CTLFLAG_MPSAFE, 0, 0, sysctl_debug_hashstat_rawnchash, "S,int", 222 "nchash chain lengths"); 223 224 static int 225 sysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS) 226 { 227 int error; 228 struct nchashhead *ncpp; 229 struct namecache *ncp; 230 int n_nchash; 231 int count, maxlength, used, pct; 232 233 if (!req->oldptr) 234 return SYSCTL_OUT(req, 0, 4 * sizeof(int)); 235 236 n_nchash = nchash + 1; /* nchash is max index, not count */ 237 used = 0; 238 maxlength = 0; 239 240 /* Scan hash tables for applicable entries */ 241 for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { 242 count = 0; 243 CACHE_RLOCK(); 244 LIST_FOREACH(ncp, ncpp, nc_hash) { 245 count++; 246 } 247 CACHE_RUNLOCK(); 248 if (count) 249 used++; 250 if (maxlength < count) 251 maxlength = count; 252 } 253 n_nchash = nchash + 1; 254 pct = (used * 100 * 100) / n_nchash; 255 error = SYSCTL_OUT(req, &n_nchash, sizeof(n_nchash)); 256 if (error) 257 return (error); 258 error = SYSCTL_OUT(req, &used, sizeof(used)); 259 if (error) 260 return (error); 261 error = SYSCTL_OUT(req, &maxlength, sizeof(maxlength)); 262 if (error) 263 return (error); 264 error = SYSCTL_OUT(req, &pct, sizeof(pct)); 265 if (error) 266 return (error); 267 return (0); 268 } 269 SYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE_INT|CTLFLAG_RD| 270 CTLFLAG_MPSAFE, 0, 0, sysctl_debug_hashstat_nchash, "I", 271 "nchash chain lengths"); 272 #endif 273 274 /* 275 * cache_zap(): 276 * 277 * Removes a namecache entry from cache, whether it contains an actual 278 * pointer to a vnode or if it is just a negative cache entry. 279 */ 280 static void 281 cache_zap(ncp) 282 struct namecache *ncp; 283 { 284 struct vnode *vp; 285 286 rw_assert(&cache_lock, RA_WLOCKED); 287 CTR2(KTR_VFS, "cache_zap(%p) vp %p", ncp, ncp->nc_vp); 288 vp = NULL; 289 LIST_REMOVE(ncp, nc_hash); 290 LIST_REMOVE(ncp, nc_src); 291 if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) { 292 vp = ncp->nc_dvp; 293 numcachehv--; 294 } 295 if (ncp->nc_vp) { 296 TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst); 297 ncp->nc_vp->v_dd = NULL; 298 } else { 299 TAILQ_REMOVE(&ncneg, ncp, nc_dst); 300 numneg--; 301 } 302 numcache--; 303 cache_free(ncp); 304 if (vp) 305 vdrop(vp); 306 } 307 308 /* 309 * Lookup an entry in the cache 310 * 311 * Lookup is called with dvp pointing to the directory to search, 312 * cnp pointing to the name of the entry being sought. If the lookup 313 * succeeds, the vnode is returned in *vpp, and a status of -1 is 314 * returned. If the lookup determines that the name does not exist 315 * (negative cacheing), a status of ENOENT is returned. If the lookup 316 * fails, a status of zero is returned. If the directory vnode is 317 * recycled out from under us due to a forced unmount, a status of 318 * EBADF is returned. 319 * 320 * vpp is locked and ref'd on return. If we're looking up DOTDOT, dvp is 321 * unlocked. If we're looking up . an extra ref is taken, but the lock is 322 * not recursively acquired. 323 */ 324 325 int 326 cache_lookup(dvp, vpp, cnp) 327 struct vnode *dvp; 328 struct vnode **vpp; 329 struct componentname *cnp; 330 { 331 struct namecache *ncp; 332 u_int32_t hash; 333 int error, ltype, wlocked; 334 335 if (!doingcache) { 336 cnp->cn_flags &= ~MAKEENTRY; 337 return (0); 338 } 339 retry: 340 CACHE_RLOCK(); 341 wlocked = 0; 342 numcalls++; 343 error = 0; 344 345 retry_wlocked: 346 if (cnp->cn_nameptr[0] == '.') { 347 if (cnp->cn_namelen == 1) { 348 *vpp = dvp; 349 CTR2(KTR_VFS, "cache_lookup(%p, %s) found via .", 350 dvp, cnp->cn_nameptr); 351 dothits++; 352 goto success; 353 } 354 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 355 dotdothits++; 356 if (dvp->v_dd == NULL || 357 (cnp->cn_flags & MAKEENTRY) == 0) { 358 goto unlock; 359 } 360 *vpp = dvp->v_dd; 361 CTR3(KTR_VFS, "cache_lookup(%p, %s) found %p via ..", 362 dvp, cnp->cn_nameptr, *vpp); 363 goto success; 364 } 365 } 366 367 hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT); 368 hash = fnv_32_buf(&dvp, sizeof(dvp), hash); 369 LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) { 370 numchecks++; 371 if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen && 372 !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen)) 373 break; 374 } 375 376 /* We failed to find an entry */ 377 if (ncp == NULL) { 378 if ((cnp->cn_flags & MAKEENTRY) == 0) { 379 nummisszap++; 380 } else { 381 nummiss++; 382 } 383 nchstats.ncs_miss++; 384 goto unlock; 385 } 386 387 /* We don't want to have an entry, so dump it */ 388 if ((cnp->cn_flags & MAKEENTRY) == 0) { 389 numposzaps++; 390 nchstats.ncs_badhits++; 391 if (!wlocked && !CACHE_UPGRADE_LOCK()) 392 goto wlock; 393 cache_zap(ncp); 394 CACHE_WUNLOCK(); 395 return (0); 396 } 397 398 /* We found a "positive" match, return the vnode */ 399 if (ncp->nc_vp) { 400 numposhits++; 401 nchstats.ncs_goodhits++; 402 *vpp = ncp->nc_vp; 403 CTR4(KTR_VFS, "cache_lookup(%p, %s) found %p via ncp %p", 404 dvp, cnp->cn_nameptr, *vpp, ncp); 405 goto success; 406 } 407 408 /* We found a negative match, and want to create it, so purge */ 409 if (cnp->cn_nameiop == CREATE) { 410 numnegzaps++; 411 nchstats.ncs_badhits++; 412 if (!wlocked && !CACHE_UPGRADE_LOCK()) 413 goto wlock; 414 cache_zap(ncp); 415 CACHE_WUNLOCK(); 416 return (0); 417 } 418 419 if (!wlocked && !CACHE_UPGRADE_LOCK()) 420 goto wlock; 421 numneghits++; 422 /* 423 * We found a "negative" match, so we shift it to the end of 424 * the "negative" cache entries queue to satisfy LRU. Also, 425 * check to see if the entry is a whiteout; indicate this to 426 * the componentname, if so. 427 */ 428 TAILQ_REMOVE(&ncneg, ncp, nc_dst); 429 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 430 nchstats.ncs_neghits++; 431 if (ncp->nc_flag & NCF_WHITE) 432 cnp->cn_flags |= ISWHITEOUT; 433 CACHE_WUNLOCK(); 434 return (ENOENT); 435 436 wlock: 437 /* 438 * We need to update the cache after our lookup, so upgrade to 439 * a write lock and retry the operation. 440 */ 441 CACHE_RUNLOCK(); 442 CACHE_WLOCK(); 443 numupgrades++; 444 wlocked = 1; 445 goto retry_wlocked; 446 447 success: 448 /* 449 * On success we return a locked and ref'd vnode as per the lookup 450 * protocol. 451 */ 452 if (dvp == *vpp) { /* lookup on "." */ 453 VREF(*vpp); 454 if (wlocked) 455 CACHE_WUNLOCK(); 456 else 457 CACHE_RUNLOCK(); 458 /* 459 * When we lookup "." we still can be asked to lock it 460 * differently... 461 */ 462 ltype = cnp->cn_lkflags & LK_TYPE_MASK; 463 if (ltype != VOP_ISLOCKED(*vpp)) { 464 if (ltype == LK_EXCLUSIVE) { 465 vn_lock(*vpp, LK_UPGRADE | LK_RETRY); 466 if ((*vpp)->v_iflag & VI_DOOMED) { 467 /* forced unmount */ 468 vrele(*vpp); 469 *vpp = NULL; 470 return (EBADF); 471 } 472 } else 473 vn_lock(*vpp, LK_DOWNGRADE | LK_RETRY); 474 } 475 return (-1); 476 } 477 ltype = 0; /* silence gcc warning */ 478 if (cnp->cn_flags & ISDOTDOT) { 479 ltype = VOP_ISLOCKED(dvp); 480 VOP_UNLOCK(dvp, 0); 481 } 482 VI_LOCK(*vpp); 483 if (wlocked) 484 CACHE_WUNLOCK(); 485 else 486 CACHE_RUNLOCK(); 487 error = vget(*vpp, cnp->cn_lkflags | LK_INTERLOCK, cnp->cn_thread); 488 if (cnp->cn_flags & ISDOTDOT) 489 vn_lock(dvp, ltype | LK_RETRY); 490 if (error) { 491 *vpp = NULL; 492 goto retry; 493 } 494 if ((cnp->cn_flags & ISLASTCN) && 495 (cnp->cn_lkflags & LK_TYPE_MASK) == LK_EXCLUSIVE) { 496 ASSERT_VOP_ELOCKED(*vpp, "cache_lookup"); 497 } 498 return (-1); 499 500 unlock: 501 if (wlocked) 502 CACHE_WUNLOCK(); 503 else 504 CACHE_RUNLOCK(); 505 return (0); 506 } 507 508 /* 509 * Add an entry to the cache. 510 */ 511 void 512 cache_enter(dvp, vp, cnp) 513 struct vnode *dvp; 514 struct vnode *vp; 515 struct componentname *cnp; 516 { 517 struct namecache *ncp, *n2; 518 struct nchashhead *ncpp; 519 u_int32_t hash; 520 int hold; 521 int zap; 522 int len; 523 524 CTR3(KTR_VFS, "cache_enter(%p, %p, %s)", dvp, vp, cnp->cn_nameptr); 525 VNASSERT(vp == NULL || (vp->v_iflag & VI_DOOMED) == 0, vp, 526 ("cahe_enter: Adding a doomed vnode")); 527 528 if (!doingcache) 529 return; 530 531 /* 532 * Avoid blowout in namecache entries. 533 */ 534 if (numcache >= desiredvnodes * 2) 535 return; 536 537 if (cnp->cn_nameptr[0] == '.') { 538 if (cnp->cn_namelen == 1) { 539 return; 540 } 541 /* 542 * For dotdot lookups only cache the v_dd pointer if the 543 * directory has a link back to its parent via v_cache_dst. 544 * Without this an unlinked directory would keep a soft 545 * reference to its parent which could not be NULLd at 546 * cache_purge() time. 547 */ 548 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 549 CACHE_WLOCK(); 550 if (!TAILQ_EMPTY(&dvp->v_cache_dst)) 551 dvp->v_dd = vp; 552 CACHE_WUNLOCK(); 553 return; 554 } 555 } 556 557 hold = 0; 558 zap = 0; 559 560 /* 561 * Calculate the hash key and setup as much of the new 562 * namecache entry as possible before acquiring the lock. 563 */ 564 ncp = cache_alloc(cnp->cn_namelen); 565 ncp->nc_vp = vp; 566 ncp->nc_dvp = dvp; 567 len = ncp->nc_nlen = cnp->cn_namelen; 568 hash = fnv_32_buf(cnp->cn_nameptr, len, FNV1_32_INIT); 569 bcopy(cnp->cn_nameptr, ncp->nc_name, len); 570 hash = fnv_32_buf(&dvp, sizeof(dvp), hash); 571 CACHE_WLOCK(); 572 573 /* 574 * See if this vnode or negative entry is already in the cache 575 * with this name. This can happen with concurrent lookups of 576 * the same path name. 577 */ 578 ncpp = NCHHASH(hash); 579 LIST_FOREACH(n2, ncpp, nc_hash) { 580 if (n2->nc_dvp == dvp && 581 n2->nc_nlen == cnp->cn_namelen && 582 !bcmp(n2->nc_name, cnp->cn_nameptr, n2->nc_nlen)) { 583 CACHE_WUNLOCK(); 584 cache_free(ncp); 585 return; 586 } 587 } 588 589 numcache++; 590 if (!vp) { 591 numneg++; 592 ncp->nc_flag = cnp->cn_flags & ISWHITEOUT ? NCF_WHITE : 0; 593 } else if (vp->v_type == VDIR) { 594 vp->v_dd = dvp; 595 } else { 596 vp->v_dd = NULL; 597 } 598 599 /* 600 * Insert the new namecache entry into the appropriate chain 601 * within the cache entries table. 602 */ 603 LIST_INSERT_HEAD(ncpp, ncp, nc_hash); 604 if (LIST_EMPTY(&dvp->v_cache_src)) { 605 hold = 1; 606 numcachehv++; 607 } 608 LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src); 609 /* 610 * If the entry is "negative", we place it into the 611 * "negative" cache queue, otherwise, we place it into the 612 * destination vnode's cache entries queue. 613 */ 614 if (vp) { 615 TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst); 616 } else { 617 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 618 } 619 if (numneg * ncnegfactor > numcache) { 620 ncp = TAILQ_FIRST(&ncneg); 621 zap = 1; 622 } 623 if (hold) 624 vhold(dvp); 625 if (zap) 626 cache_zap(ncp); 627 CACHE_WUNLOCK(); 628 } 629 630 /* 631 * Name cache initialization, from vfs_init() when we are booting 632 */ 633 static void 634 nchinit(void *dummy __unused) 635 { 636 637 TAILQ_INIT(&ncneg); 638 639 cache_zone_small = uma_zcreate("S VFS Cache", CACHE_ZONE_SMALL, NULL, 640 NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT); 641 cache_zone_large = uma_zcreate("L VFS Cache", CACHE_ZONE_LARGE, NULL, 642 NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT); 643 644 nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash); 645 } 646 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL); 647 648 649 /* 650 * Invalidate all entries to a particular vnode. 651 */ 652 void 653 cache_purge(vp) 654 struct vnode *vp; 655 { 656 657 CTR1(KTR_VFS, "cache_purge(%p)", vp); 658 CACHE_WLOCK(); 659 while (!LIST_EMPTY(&vp->v_cache_src)) 660 cache_zap(LIST_FIRST(&vp->v_cache_src)); 661 while (!TAILQ_EMPTY(&vp->v_cache_dst)) 662 cache_zap(TAILQ_FIRST(&vp->v_cache_dst)); 663 vp->v_dd = NULL; 664 CACHE_WUNLOCK(); 665 } 666 667 /* 668 * Invalidate all negative entries for a particular directory vnode. 669 */ 670 void 671 cache_purge_negative(vp) 672 struct vnode *vp; 673 { 674 struct namecache *cp, *ncp; 675 676 CTR1(KTR_VFS, "cache_purge_negative(%p)", vp); 677 CACHE_WLOCK(); 678 LIST_FOREACH_SAFE(cp, &vp->v_cache_src, nc_src, ncp) { 679 if (cp->nc_vp == NULL) 680 cache_zap(cp); 681 } 682 CACHE_WUNLOCK(); 683 } 684 685 /* 686 * Flush all entries referencing a particular filesystem. 687 */ 688 void 689 cache_purgevfs(mp) 690 struct mount *mp; 691 { 692 struct nchashhead *ncpp; 693 struct namecache *ncp, *nnp; 694 695 /* Scan hash tables for applicable entries */ 696 CACHE_WLOCK(); 697 for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) { 698 LIST_FOREACH_SAFE(ncp, ncpp, nc_hash, nnp) { 699 if (ncp->nc_dvp->v_mount == mp) 700 cache_zap(ncp); 701 } 702 } 703 CACHE_WUNLOCK(); 704 } 705 706 /* 707 * Perform canonical checks and cache lookup and pass on to filesystem 708 * through the vop_cachedlookup only if needed. 709 */ 710 711 int 712 vfs_cache_lookup(ap) 713 struct vop_lookup_args /* { 714 struct vnode *a_dvp; 715 struct vnode **a_vpp; 716 struct componentname *a_cnp; 717 } */ *ap; 718 { 719 struct vnode *dvp; 720 int error; 721 struct vnode **vpp = ap->a_vpp; 722 struct componentname *cnp = ap->a_cnp; 723 struct ucred *cred = cnp->cn_cred; 724 int flags = cnp->cn_flags; 725 struct thread *td = cnp->cn_thread; 726 727 *vpp = NULL; 728 dvp = ap->a_dvp; 729 730 if (dvp->v_type != VDIR) 731 return (ENOTDIR); 732 733 if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && 734 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) 735 return (EROFS); 736 737 error = VOP_ACCESS(dvp, VEXEC, cred, td); 738 if (error) 739 return (error); 740 741 error = cache_lookup(dvp, vpp, cnp); 742 if (error == 0) 743 return (VOP_CACHEDLOOKUP(dvp, vpp, cnp)); 744 if (error == -1) 745 return (0); 746 return (error); 747 } 748 749 750 #ifndef _SYS_SYSPROTO_H_ 751 struct __getcwd_args { 752 u_char *buf; 753 u_int buflen; 754 }; 755 #endif 756 757 /* 758 * XXX All of these sysctls would probably be more productive dead. 759 */ 760 static int disablecwd; 761 SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0, 762 "Disable the getcwd syscall"); 763 764 /* Implementation of the getcwd syscall. */ 765 int 766 __getcwd(td, uap) 767 struct thread *td; 768 struct __getcwd_args *uap; 769 { 770 771 return (kern___getcwd(td, uap->buf, UIO_USERSPACE, uap->buflen)); 772 } 773 774 int 775 kern___getcwd(struct thread *td, u_char *buf, enum uio_seg bufseg, u_int buflen) 776 { 777 char *bp, *tmpbuf; 778 struct filedesc *fdp; 779 struct vnode *cdir, *rdir; 780 int error, vfslocked; 781 782 if (disablecwd) 783 return (ENODEV); 784 if (buflen < 2) 785 return (EINVAL); 786 if (buflen > MAXPATHLEN) 787 buflen = MAXPATHLEN; 788 789 tmpbuf = malloc(buflen, M_TEMP, M_WAITOK); 790 fdp = td->td_proc->p_fd; 791 FILEDESC_SLOCK(fdp); 792 cdir = fdp->fd_cdir; 793 VREF(cdir); 794 rdir = fdp->fd_rdir; 795 VREF(rdir); 796 FILEDESC_SUNLOCK(fdp); 797 error = vn_fullpath1(td, cdir, rdir, tmpbuf, &bp, buflen); 798 vfslocked = VFS_LOCK_GIANT(rdir->v_mount); 799 vrele(rdir); 800 VFS_UNLOCK_GIANT(vfslocked); 801 vfslocked = VFS_LOCK_GIANT(cdir->v_mount); 802 vrele(cdir); 803 VFS_UNLOCK_GIANT(vfslocked); 804 805 if (!error) { 806 if (bufseg == UIO_SYSSPACE) 807 bcopy(bp, buf, strlen(bp) + 1); 808 else 809 error = copyout(bp, buf, strlen(bp) + 1); 810 } 811 free(tmpbuf, M_TEMP); 812 return (error); 813 } 814 815 /* 816 * Thus begins the fullpath magic. 817 */ 818 819 #undef STATNODE 820 #define STATNODE(name) \ 821 static u_int name; \ 822 SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, "") 823 824 static int disablefullpath; 825 SYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, &disablefullpath, 0, 826 "Disable the vn_fullpath function"); 827 828 /* These count for kern___getcwd(), too. */ 829 STATNODE(numfullpathcalls); 830 STATNODE(numfullpathfail1); 831 STATNODE(numfullpathfail2); 832 STATNODE(numfullpathfail4); 833 STATNODE(numfullpathfound); 834 835 /* 836 * Retrieve the full filesystem path that correspond to a vnode from the name 837 * cache (if available) 838 */ 839 int 840 vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf) 841 { 842 char *buf; 843 struct filedesc *fdp; 844 struct vnode *rdir; 845 int error, vfslocked; 846 847 if (disablefullpath) 848 return (ENODEV); 849 if (vn == NULL) 850 return (EINVAL); 851 852 buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 853 fdp = td->td_proc->p_fd; 854 FILEDESC_SLOCK(fdp); 855 rdir = fdp->fd_rdir; 856 VREF(rdir); 857 FILEDESC_SUNLOCK(fdp); 858 error = vn_fullpath1(td, vn, rdir, buf, retbuf, MAXPATHLEN); 859 vfslocked = VFS_LOCK_GIANT(rdir->v_mount); 860 vrele(rdir); 861 VFS_UNLOCK_GIANT(vfslocked); 862 863 if (!error) 864 *freebuf = buf; 865 else 866 free(buf, M_TEMP); 867 return (error); 868 } 869 870 /* 871 * This function is similar to vn_fullpath, but it attempts to lookup the 872 * pathname relative to the global root mount point. This is required for the 873 * auditing sub-system, as audited pathnames must be absolute, relative to the 874 * global root mount point. 875 */ 876 int 877 vn_fullpath_global(struct thread *td, struct vnode *vn, 878 char **retbuf, char **freebuf) 879 { 880 char *buf; 881 int error; 882 883 if (disablefullpath) 884 return (ENODEV); 885 if (vn == NULL) 886 return (EINVAL); 887 buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 888 error = vn_fullpath1(td, vn, rootvnode, buf, retbuf, MAXPATHLEN); 889 if (!error) 890 *freebuf = buf; 891 else 892 free(buf, M_TEMP); 893 return (error); 894 } 895 896 static int 897 vn_vptocnp(struct vnode **vp, char **bp, char *buf, u_int *buflen) 898 { 899 struct vnode *dvp; 900 int error, vfslocked; 901 902 vhold(*vp); 903 CACHE_RUNLOCK(); 904 vfslocked = VFS_LOCK_GIANT((*vp)->v_mount); 905 vn_lock(*vp, LK_SHARED | LK_RETRY); 906 error = VOP_VPTOCNP(*vp, &dvp, buf, buflen); 907 VOP_UNLOCK(*vp, 0); 908 vdrop(*vp); 909 VFS_UNLOCK_GIANT(vfslocked); 910 if (error) { 911 numfullpathfail2++; 912 return (error); 913 } 914 *bp = buf + *buflen; 915 *vp = dvp; 916 CACHE_RLOCK(); 917 if ((*vp)->v_iflag & VI_DOOMED) { 918 /* forced unmount */ 919 CACHE_RUNLOCK(); 920 vdrop(*vp); 921 return (ENOENT); 922 } 923 vdrop(*vp); 924 925 return (0); 926 } 927 928 /* 929 * The magic behind kern___getcwd() and vn_fullpath(). 930 */ 931 static int 932 vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir, 933 char *buf, char **retbuf, u_int buflen) 934 { 935 char *bp; 936 int error, i, slash_prefixed; 937 struct namecache *ncp; 938 939 buflen--; 940 bp = buf + buflen; 941 *bp = '\0'; 942 error = 0; 943 slash_prefixed = 0; 944 945 CACHE_RLOCK(); 946 numfullpathcalls++; 947 if (vp->v_type != VDIR) { 948 ncp = TAILQ_FIRST(&vp->v_cache_dst); 949 if (ncp != NULL) { 950 for (i = ncp->nc_nlen - 1; i >= 0 && bp > buf; i--) 951 *--bp = ncp->nc_name[i]; 952 if (bp == buf) { 953 numfullpathfail4++; 954 CACHE_RUNLOCK(); 955 return (ENOMEM); 956 } 957 vp = ncp->nc_dvp; 958 } else { 959 error = vn_vptocnp(&vp, &bp, buf, &buflen); 960 if (error) { 961 return (error); 962 } 963 } 964 *--bp = '/'; 965 buflen--; 966 if (buflen < 0) { 967 numfullpathfail4++; 968 CACHE_RUNLOCK(); 969 return (ENOMEM); 970 } 971 slash_prefixed = 1; 972 } 973 while (vp != rdir && vp != rootvnode) { 974 if (vp->v_vflag & VV_ROOT) { 975 if (vp->v_iflag & VI_DOOMED) { /* forced unmount */ 976 CACHE_RUNLOCK(); 977 error = EBADF; 978 break; 979 } 980 vp = vp->v_mount->mnt_vnodecovered; 981 continue; 982 } 983 if (vp->v_type != VDIR) { 984 numfullpathfail1++; 985 CACHE_RUNLOCK(); 986 error = ENOTDIR; 987 break; 988 } 989 ncp = TAILQ_FIRST(&vp->v_cache_dst); 990 if (ncp != NULL) { 991 MPASS(vp->v_dd == NULL || ncp->nc_dvp == vp->v_dd); 992 buflen -= ncp->nc_nlen - 1; 993 for (i = ncp->nc_nlen - 1; i >= 0 && bp != buf; i--) 994 *--bp = ncp->nc_name[i]; 995 if (bp == buf) { 996 numfullpathfail4++; 997 CACHE_RUNLOCK(); 998 error = ENOMEM; 999 break; 1000 } 1001 vp = ncp->nc_dvp; 1002 } else { 1003 error = vn_vptocnp(&vp, &bp, buf, &buflen); 1004 if (error) { 1005 break; 1006 } 1007 } 1008 *--bp = '/'; 1009 buflen--; 1010 if (buflen < 0) { 1011 numfullpathfail4++; 1012 CACHE_RUNLOCK(); 1013 error = ENOMEM; 1014 break; 1015 } 1016 slash_prefixed = 1; 1017 } 1018 if (error) 1019 return (error); 1020 if (!slash_prefixed) { 1021 if (bp == buf) { 1022 numfullpathfail4++; 1023 CACHE_RUNLOCK(); 1024 return (ENOMEM); 1025 } else { 1026 *--bp = '/'; 1027 } 1028 } 1029 numfullpathfound++; 1030 CACHE_RUNLOCK(); 1031 1032 *retbuf = bp; 1033 return (0); 1034 } 1035 1036 int 1037 vn_commname(struct vnode *vp, char *buf, u_int buflen) 1038 { 1039 struct namecache *ncp; 1040 int l; 1041 1042 CACHE_RLOCK(); 1043 ncp = TAILQ_FIRST(&vp->v_cache_dst); 1044 if (!ncp) { 1045 CACHE_RUNLOCK(); 1046 return (ENOENT); 1047 } 1048 l = min(ncp->nc_nlen, buflen - 1); 1049 memcpy(buf, ncp->nc_name, l); 1050 CACHE_RUNLOCK(); 1051 buf[l] = '\0'; 1052 return (0); 1053 } 1054