1 /*- 2 * Copyright (c) 1989, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Poul-Henning Kamp of the FreeBSD Project. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/kernel.h> 41 #include <sys/lock.h> 42 #include <sys/mutex.h> 43 #include <sys/sysctl.h> 44 #include <sys/mount.h> 45 #include <sys/vnode.h> 46 #include <sys/namei.h> 47 #include <sys/malloc.h> 48 #include <sys/syscallsubr.h> 49 #include <sys/sysproto.h> 50 #include <sys/proc.h> 51 #include <sys/filedesc.h> 52 #include <sys/fnv_hash.h> 53 54 #include <vm/uma.h> 55 56 /* 57 * This structure describes the elements in the cache of recent 58 * names looked up by namei. 59 */ 60 61 struct namecache { 62 LIST_ENTRY(namecache) nc_hash; /* hash chain */ 63 LIST_ENTRY(namecache) nc_src; /* source vnode list */ 64 TAILQ_ENTRY(namecache) nc_dst; /* destination vnode list */ 65 struct vnode *nc_dvp; /* vnode of parent of name */ 66 struct vnode *nc_vp; /* vnode the name refers to */ 67 u_char nc_flag; /* flag bits */ 68 u_char nc_nlen; /* length of name */ 69 char nc_name[0]; /* segment name */ 70 }; 71 72 /* 73 * Name caching works as follows: 74 * 75 * Names found by directory scans are retained in a cache 76 * for future reference. It is managed LRU, so frequently 77 * used names will hang around. Cache is indexed by hash value 78 * obtained from (vp, name) where vp refers to the directory 79 * containing name. 80 * 81 * If it is a "negative" entry, (i.e. for a name that is known NOT to 82 * exist) the vnode pointer will be NULL. 83 * 84 * Upon reaching the last segment of a path, if the reference 85 * is for DELETE, or NOCACHE is set (rewrite), and the 86 * name is located in the cache, it will be dropped. 87 */ 88 89 /* 90 * Structures associated with name cacheing. 91 */ 92 #define NCHHASH(hash) \ 93 (&nchashtbl[(hash) & nchash]) 94 static LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */ 95 static TAILQ_HEAD(, namecache) ncneg; /* Hash Table */ 96 static u_long nchash; /* size of hash table */ 97 SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, ""); 98 static u_long ncnegfactor = 16; /* ratio of negative entries */ 99 SYSCTL_ULONG(_debug, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, ""); 100 static u_long numneg; /* number of cache entries allocated */ 101 SYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, ""); 102 static u_long numcache; /* number of cache entries allocated */ 103 SYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, ""); 104 static u_long numcachehv; /* number of cache entries with vnodes held */ 105 SYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0, ""); 106 #if 0 107 static u_long numcachepl; /* number of cache purge for leaf entries */ 108 SYSCTL_ULONG(_debug, OID_AUTO, numcachepl, CTLFLAG_RD, &numcachepl, 0, ""); 109 #endif 110 struct nchstats nchstats; /* cache effectiveness statistics */ 111 112 static struct mtx cache_lock; 113 MTX_SYSINIT(vfscache, &cache_lock, "Name Cache", MTX_DEF); 114 115 #define CACHE_LOCK() mtx_lock(&cache_lock) 116 #define CACHE_UNLOCK() mtx_unlock(&cache_lock) 117 118 /* 119 * UMA zones for the VFS cache. 120 * 121 * The small cache is used for entries with short names, which are the 122 * most common. The large cache is used for entries which are too big to 123 * fit in the small cache. 124 */ 125 static uma_zone_t cache_zone_small; 126 static uma_zone_t cache_zone_large; 127 128 #define CACHE_PATH_CUTOFF 32 129 #define CACHE_ZONE_SMALL (sizeof(struct namecache) + CACHE_PATH_CUTOFF) 130 #define CACHE_ZONE_LARGE (sizeof(struct namecache) + NAME_MAX) 131 132 #define cache_alloc(len) uma_zalloc(((len) <= CACHE_PATH_CUTOFF) ? \ 133 cache_zone_small : cache_zone_large, M_WAITOK) 134 #define cache_free(ncp) do { \ 135 if (ncp != NULL) \ 136 uma_zfree(((ncp)->nc_nlen <= CACHE_PATH_CUTOFF) ? \ 137 cache_zone_small : cache_zone_large, (ncp)); \ 138 } while (0) 139 140 static int doingcache = 1; /* 1 => enable the cache */ 141 SYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, ""); 142 143 /* Export size information to userland */ 144 SYSCTL_INT(_debug, OID_AUTO, vnsize, CTLFLAG_RD, 0, sizeof(struct vnode), ""); 145 SYSCTL_INT(_debug, OID_AUTO, ncsize, CTLFLAG_RD, 0, sizeof(struct namecache), ""); 146 147 /* 148 * The new name cache statistics 149 */ 150 static SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0, "Name cache statistics"); 151 #define STATNODE(mode, name, var) \ 152 SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, ""); 153 STATNODE(CTLFLAG_RD, numneg, &numneg); 154 STATNODE(CTLFLAG_RD, numcache, &numcache); 155 static u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls); 156 static u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits); 157 static u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits); 158 static u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks); 159 static u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss); 160 static u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap); 161 static u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps); 162 static u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits); 163 static u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps); 164 static u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits); 165 166 SYSCTL_OPAQUE(_vfs_cache, OID_AUTO, nchstats, CTLFLAG_RD, &nchstats, 167 sizeof(nchstats), "LU", "VFS cache effectiveness statistics"); 168 169 170 171 static void cache_zap(struct namecache *ncp); 172 173 static MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries"); 174 175 /* 176 * Flags in namecache.nc_flag 177 */ 178 #define NCF_WHITE 1 179 180 /* 181 * Grab an atomic snapshot of the name cache hash chain lengths 182 */ 183 SYSCTL_NODE(_debug, OID_AUTO, hashstat, CTLFLAG_RW, NULL, "hash table stats"); 184 185 static int 186 sysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS) 187 { 188 int error; 189 struct nchashhead *ncpp; 190 struct namecache *ncp; 191 int n_nchash; 192 int count; 193 194 n_nchash = nchash + 1; /* nchash is max index, not count */ 195 if (!req->oldptr) 196 return SYSCTL_OUT(req, 0, n_nchash * sizeof(int)); 197 198 /* Scan hash tables for applicable entries */ 199 for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { 200 count = 0; 201 LIST_FOREACH(ncp, ncpp, nc_hash) { 202 count++; 203 } 204 error = SYSCTL_OUT(req, &count, sizeof(count)); 205 if (error) 206 return (error); 207 } 208 return (0); 209 } 210 SYSCTL_PROC(_debug_hashstat, OID_AUTO, rawnchash, CTLTYPE_INT|CTLFLAG_RD, 211 0, 0, sysctl_debug_hashstat_rawnchash, "S,int", "nchash chain lengths"); 212 213 static int 214 sysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS) 215 { 216 int error; 217 struct nchashhead *ncpp; 218 struct namecache *ncp; 219 int n_nchash; 220 int count, maxlength, used, pct; 221 222 if (!req->oldptr) 223 return SYSCTL_OUT(req, 0, 4 * sizeof(int)); 224 225 n_nchash = nchash + 1; /* nchash is max index, not count */ 226 used = 0; 227 maxlength = 0; 228 229 /* Scan hash tables for applicable entries */ 230 for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { 231 count = 0; 232 LIST_FOREACH(ncp, ncpp, nc_hash) { 233 count++; 234 } 235 if (count) 236 used++; 237 if (maxlength < count) 238 maxlength = count; 239 } 240 n_nchash = nchash + 1; 241 pct = (used * 100 * 100) / n_nchash; 242 error = SYSCTL_OUT(req, &n_nchash, sizeof(n_nchash)); 243 if (error) 244 return (error); 245 error = SYSCTL_OUT(req, &used, sizeof(used)); 246 if (error) 247 return (error); 248 error = SYSCTL_OUT(req, &maxlength, sizeof(maxlength)); 249 if (error) 250 return (error); 251 error = SYSCTL_OUT(req, &pct, sizeof(pct)); 252 if (error) 253 return (error); 254 return (0); 255 } 256 SYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE_INT|CTLFLAG_RD, 257 0, 0, sysctl_debug_hashstat_nchash, "I", "nchash chain lengths"); 258 259 /* 260 * cache_zap(): 261 * 262 * Removes a namecache entry from cache, whether it contains an actual 263 * pointer to a vnode or if it is just a negative cache entry. 264 */ 265 static void 266 cache_zap(ncp) 267 struct namecache *ncp; 268 { 269 struct vnode *vp; 270 271 mtx_assert(&cache_lock, MA_OWNED); 272 vp = NULL; 273 LIST_REMOVE(ncp, nc_hash); 274 LIST_REMOVE(ncp, nc_src); 275 if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) { 276 vp = ncp->nc_dvp; 277 numcachehv--; 278 } 279 if (ncp->nc_vp) { 280 TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst); 281 } else { 282 TAILQ_REMOVE(&ncneg, ncp, nc_dst); 283 numneg--; 284 } 285 numcache--; 286 cache_free(ncp); 287 if (vp) 288 vdrop(vp); 289 } 290 291 /* 292 * cache_leaf_test() 293 * 294 * Test whether this (directory) vnode's namei cache entry contains 295 * subdirectories or not. Used to determine whether the directory is 296 * a leaf in the namei cache or not. Note: the directory may still 297 * contain files in the namei cache. 298 * 299 * Returns 0 if the directory is a leaf, -1 if it isn't. 300 */ 301 int 302 cache_leaf_test(struct vnode *vp) 303 { 304 struct namecache *ncpc; 305 int leaf; 306 307 leaf = 0; 308 CACHE_LOCK(); 309 for (ncpc = LIST_FIRST(&vp->v_cache_src); 310 ncpc != NULL; 311 ncpc = LIST_NEXT(ncpc, nc_src) 312 ) { 313 if (ncpc->nc_vp != NULL && ncpc->nc_vp->v_type == VDIR) { 314 leaf = -1; 315 break; 316 } 317 } 318 CACHE_UNLOCK(); 319 return (leaf); 320 } 321 322 /* 323 * Lookup an entry in the cache 324 * 325 * Lookup is called with dvp pointing to the directory to search, 326 * cnp pointing to the name of the entry being sought. If the lookup 327 * succeeds, the vnode is returned in *vpp, and a status of -1 is 328 * returned. If the lookup determines that the name does not exist 329 * (negative cacheing), a status of ENOENT is returned. If the lookup 330 * fails, a status of zero is returned. 331 */ 332 333 int 334 cache_lookup(dvp, vpp, cnp) 335 struct vnode *dvp; 336 struct vnode **vpp; 337 struct componentname *cnp; 338 { 339 struct namecache *ncp; 340 u_int32_t hash; 341 342 if (!doingcache) { 343 cnp->cn_flags &= ~MAKEENTRY; 344 return (0); 345 } 346 347 CACHE_LOCK(); 348 numcalls++; 349 350 if (cnp->cn_nameptr[0] == '.') { 351 if (cnp->cn_namelen == 1) { 352 *vpp = dvp; 353 dothits++; 354 CACHE_UNLOCK(); 355 return (-1); 356 } 357 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 358 dotdothits++; 359 if (dvp->v_dd->v_id != dvp->v_ddid || 360 (cnp->cn_flags & MAKEENTRY) == 0) { 361 dvp->v_ddid = 0; 362 CACHE_UNLOCK(); 363 return (0); 364 } 365 *vpp = dvp->v_dd; 366 CACHE_UNLOCK(); 367 return (-1); 368 } 369 } 370 371 hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT); 372 hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash); 373 LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) { 374 numchecks++; 375 if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen && 376 !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen)) 377 break; 378 } 379 380 /* We failed to find an entry */ 381 if (ncp == 0) { 382 if ((cnp->cn_flags & MAKEENTRY) == 0) { 383 nummisszap++; 384 } else { 385 nummiss++; 386 } 387 nchstats.ncs_miss++; 388 CACHE_UNLOCK(); 389 return (0); 390 } 391 392 /* We don't want to have an entry, so dump it */ 393 if ((cnp->cn_flags & MAKEENTRY) == 0) { 394 numposzaps++; 395 nchstats.ncs_badhits++; 396 cache_zap(ncp); 397 CACHE_UNLOCK(); 398 return (0); 399 } 400 401 /* We found a "positive" match, return the vnode */ 402 if (ncp->nc_vp) { 403 numposhits++; 404 nchstats.ncs_goodhits++; 405 *vpp = ncp->nc_vp; 406 CACHE_UNLOCK(); 407 return (-1); 408 } 409 410 /* We found a negative match, and want to create it, so purge */ 411 if (cnp->cn_nameiop == CREATE) { 412 numnegzaps++; 413 nchstats.ncs_badhits++; 414 cache_zap(ncp); 415 CACHE_UNLOCK(); 416 return (0); 417 } 418 419 numneghits++; 420 /* 421 * We found a "negative" match, so we shift it to the end of 422 * the "negative" cache entries queue to satisfy LRU. Also, 423 * check to see if the entry is a whiteout; indicate this to 424 * the componentname, if so. 425 */ 426 TAILQ_REMOVE(&ncneg, ncp, nc_dst); 427 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 428 nchstats.ncs_neghits++; 429 if (ncp->nc_flag & NCF_WHITE) 430 cnp->cn_flags |= ISWHITEOUT; 431 CACHE_UNLOCK(); 432 return (ENOENT); 433 } 434 435 /* 436 * Add an entry to the cache. 437 */ 438 void 439 cache_enter(dvp, vp, cnp) 440 struct vnode *dvp; 441 struct vnode *vp; 442 struct componentname *cnp; 443 { 444 struct namecache *ncp; 445 struct nchashhead *ncpp; 446 u_int32_t hash; 447 int hold; 448 int zap; 449 int len; 450 451 if (!doingcache) 452 return; 453 454 if (cnp->cn_nameptr[0] == '.') { 455 if (cnp->cn_namelen == 1) { 456 return; 457 } 458 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 459 if (vp) { 460 dvp->v_dd = vp; 461 dvp->v_ddid = vp->v_id; 462 } else { 463 dvp->v_dd = dvp; 464 dvp->v_ddid = 0; 465 } 466 return; 467 } 468 } 469 470 hold = 0; 471 zap = 0; 472 ncp = cache_alloc(cnp->cn_namelen); 473 CACHE_LOCK(); 474 numcache++; 475 if (!vp) { 476 numneg++; 477 ncp->nc_flag = cnp->cn_flags & ISWHITEOUT ? NCF_WHITE : 0; 478 } else if (vp->v_type == VDIR) { 479 vp->v_dd = dvp; 480 vp->v_ddid = dvp->v_id; 481 } 482 483 /* 484 * Set the rest of the namecache entry elements, calculate it's 485 * hash key and insert it into the appropriate chain within 486 * the cache entries table. 487 */ 488 ncp->nc_vp = vp; 489 ncp->nc_dvp = dvp; 490 len = ncp->nc_nlen = cnp->cn_namelen; 491 hash = fnv_32_buf(cnp->cn_nameptr, len, FNV1_32_INIT); 492 bcopy(cnp->cn_nameptr, ncp->nc_name, len); 493 hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash); 494 ncpp = NCHHASH(hash); 495 LIST_INSERT_HEAD(ncpp, ncp, nc_hash); 496 if (LIST_EMPTY(&dvp->v_cache_src)) { 497 hold = 1; 498 numcachehv++; 499 } 500 LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src); 501 /* 502 * If the entry is "negative", we place it into the 503 * "negative" cache queue, otherwise, we place it into the 504 * destination vnode's cache entries queue. 505 */ 506 if (vp) { 507 TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst); 508 } else { 509 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 510 } 511 if (numneg * ncnegfactor > numcache) { 512 ncp = TAILQ_FIRST(&ncneg); 513 zap = 1; 514 } 515 if (hold) 516 vhold(dvp); 517 if (zap) 518 cache_zap(ncp); 519 CACHE_UNLOCK(); 520 } 521 522 /* 523 * Name cache initialization, from vfs_init() when we are booting 524 */ 525 static void 526 nchinit(void *dummy __unused) 527 { 528 529 TAILQ_INIT(&ncneg); 530 531 cache_zone_small = uma_zcreate("S VFS Cache", CACHE_ZONE_SMALL, NULL, 532 NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT); 533 cache_zone_large = uma_zcreate("L VFS Cache", CACHE_ZONE_LARGE, NULL, 534 NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT); 535 536 nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash); 537 } 538 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL) 539 540 541 /* 542 * Invalidate all entries to a particular vnode. 543 * 544 * Remove all entries in the namecache relating to this vnode and 545 * change the v_id. We take the v_id from a global counter, since 546 * it becomes a handy sequence number in crash-dumps that way. 547 * No valid vnode will ever have (v_id == 0). 548 * 549 * XXX: Only time and the size of v_id prevents this from failing: 550 * XXX: In theory we should hunt down all (struct vnode*, v_id) 551 * XXX: soft references and nuke them, at least on the global 552 * XXX: v_id wraparound. The period of resistance can be extended 553 * XXX: by incrementing each vnodes v_id individually instead of 554 * XXX: using the global v_id. 555 */ 556 557 /* 558 * XXX This is sometimes called when a vnode may still be re-used, in which 559 * case v_dd may be invalid. Need to look this up. 560 */ 561 void 562 cache_purge(vp) 563 struct vnode *vp; 564 { 565 static u_long nextid; 566 567 CACHE_LOCK(); 568 while (!LIST_EMPTY(&vp->v_cache_src)) 569 cache_zap(LIST_FIRST(&vp->v_cache_src)); 570 while (!TAILQ_EMPTY(&vp->v_cache_dst)) 571 cache_zap(TAILQ_FIRST(&vp->v_cache_dst)); 572 573 do 574 nextid++; 575 while (nextid == vp->v_id || !nextid); 576 vp->v_id = nextid; 577 vp->v_dd = vp; 578 vp->v_ddid = 0; 579 CACHE_UNLOCK(); 580 } 581 582 /* 583 * Flush all entries referencing a particular filesystem. 584 * 585 * Since we need to check it anyway, we will flush all the invalid 586 * entries at the same time. 587 */ 588 void 589 cache_purgevfs(mp) 590 struct mount *mp; 591 { 592 struct nchashhead *ncpp; 593 struct namecache *ncp, *nnp; 594 struct nchashhead mplist; 595 596 LIST_INIT(&mplist); 597 ncp = NULL; 598 599 /* Scan hash tables for applicable entries */ 600 CACHE_LOCK(); 601 for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) { 602 for (ncp = LIST_FIRST(ncpp); ncp != 0; ncp = nnp) { 603 nnp = LIST_NEXT(ncp, nc_hash); 604 if (ncp->nc_dvp->v_mount == mp) { 605 LIST_REMOVE(ncp, nc_hash); 606 LIST_INSERT_HEAD(&mplist, ncp, nc_hash); 607 } 608 } 609 } 610 while (!LIST_EMPTY(&mplist)) 611 cache_zap(LIST_FIRST(&mplist)); 612 CACHE_UNLOCK(); 613 } 614 615 /* 616 * Perform canonical checks and cache lookup and pass on to filesystem 617 * through the vop_cachedlookup only if needed. 618 */ 619 620 int 621 vfs_cache_lookup(ap) 622 struct vop_lookup_args /* { 623 struct vnode *a_dvp; 624 struct vnode **a_vpp; 625 struct componentname *a_cnp; 626 } */ *ap; 627 { 628 struct vnode *dvp, *vp; 629 int lockparent; 630 int error; 631 struct vnode **vpp = ap->a_vpp; 632 struct componentname *cnp = ap->a_cnp; 633 struct ucred *cred = cnp->cn_cred; 634 int flags = cnp->cn_flags; 635 struct thread *td = cnp->cn_thread; 636 u_long vpid; /* capability number of vnode */ 637 638 *vpp = NULL; 639 dvp = ap->a_dvp; 640 lockparent = flags & LOCKPARENT; 641 642 if (dvp->v_type != VDIR) 643 return (ENOTDIR); 644 645 if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && 646 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) 647 return (EROFS); 648 649 error = VOP_ACCESS(dvp, VEXEC, cred, td); 650 651 if (error) 652 return (error); 653 654 error = cache_lookup(dvp, vpp, cnp); 655 656 #ifdef LOOKUP_SHARED 657 if (!error) { 658 /* We do this because the rest of the system now expects to get 659 * a shared lock, which is later upgraded if LOCKSHARED is not 660 * set. We have so many cases here because of bugs that yield 661 * inconsistant lock states. This all badly needs to be fixed 662 */ 663 error = VOP_CACHEDLOOKUP(dvp, vpp, cnp); 664 if (!error) { 665 int flock; 666 667 flock = VOP_ISLOCKED(*vpp, td); 668 if (flock != LK_EXCLUSIVE) { 669 if (flock == 0) { 670 if ((flags & ISLASTCN) && 671 (flags & LOCKSHARED)) 672 VOP_LOCK(*vpp, LK_SHARED, td); 673 else 674 VOP_LOCK(*vpp, LK_EXCLUSIVE, td); 675 } 676 } else if ((flags & ISLASTCN) && (flags & LOCKSHARED)) 677 VOP_LOCK(*vpp, LK_DOWNGRADE, td); 678 } 679 return (error); 680 } 681 #else 682 if (!error) 683 return (VOP_CACHEDLOOKUP(dvp, vpp, cnp)); 684 #endif 685 686 if (error == ENOENT) 687 return (error); 688 689 vp = *vpp; 690 vpid = vp->v_id; 691 cnp->cn_flags &= ~PDIRUNLOCK; 692 if (dvp == vp) { /* lookup on "." */ 693 VREF(vp); 694 error = 0; 695 } else if (flags & ISDOTDOT) { 696 VOP_UNLOCK(dvp, 0, td); 697 cnp->cn_flags |= PDIRUNLOCK; 698 #ifdef LOOKUP_SHARED 699 if ((flags & ISLASTCN) && (flags & LOCKSHARED)) 700 error = vget(vp, LK_SHARED, td); 701 else 702 error = vget(vp, LK_EXCLUSIVE, td); 703 #else 704 error = vget(vp, LK_EXCLUSIVE, td); 705 #endif 706 707 if (!error && lockparent && (flags & ISLASTCN)) { 708 if ((error = vn_lock(dvp, LK_EXCLUSIVE, td)) == 0) 709 cnp->cn_flags &= ~PDIRUNLOCK; 710 } 711 } else { 712 #ifdef LOOKUP_SHARED 713 if ((flags & ISLASTCN) && (flags & LOCKSHARED)) 714 error = vget(vp, LK_SHARED, td); 715 else 716 error = vget(vp, LK_EXCLUSIVE, td); 717 #else 718 error = vget(vp, LK_EXCLUSIVE, td); 719 #endif 720 if (!lockparent || error || !(flags & ISLASTCN)) { 721 VOP_UNLOCK(dvp, 0, td); 722 cnp->cn_flags |= PDIRUNLOCK; 723 } 724 } 725 /* 726 * Check that the capability number did not change 727 * while we were waiting for the lock. 728 */ 729 if (!error) { 730 if (vpid == vp->v_id) 731 return (0); 732 vput(vp); 733 if (lockparent && dvp != vp && (flags & ISLASTCN)) { 734 VOP_UNLOCK(dvp, 0, td); 735 cnp->cn_flags |= PDIRUNLOCK; 736 } 737 } 738 if (cnp->cn_flags & PDIRUNLOCK) { 739 error = vn_lock(dvp, LK_EXCLUSIVE, td); 740 if (error) 741 return (error); 742 cnp->cn_flags &= ~PDIRUNLOCK; 743 } 744 #ifdef LOOKUP_SHARED 745 error = VOP_CACHEDLOOKUP(dvp, vpp, cnp); 746 747 if (!error) { 748 int flock = 0; 749 750 flock = VOP_ISLOCKED(*vpp, td); 751 if (flock != LK_EXCLUSIVE) { 752 if (flock == 0) { 753 if ((flags & ISLASTCN) && (flags & LOCKSHARED)) 754 VOP_LOCK(*vpp, LK_SHARED, td); 755 else 756 VOP_LOCK(*vpp, LK_EXCLUSIVE, td); 757 } 758 } else if ((flags & ISLASTCN) && (flags & LOCKSHARED)) 759 VOP_LOCK(*vpp, LK_DOWNGRADE, td); 760 } 761 762 return (error); 763 #else 764 return (VOP_CACHEDLOOKUP(dvp, vpp, cnp)); 765 #endif 766 } 767 768 769 #ifndef _SYS_SYSPROTO_H_ 770 struct __getcwd_args { 771 u_char *buf; 772 u_int buflen; 773 }; 774 #endif 775 776 /* 777 * XXX All of these sysctls would probably be more productive dead. 778 */ 779 static int disablecwd; 780 SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0, 781 "Disable the getcwd syscall"); 782 783 /* Various statistics for the getcwd syscall */ 784 static u_long numcwdcalls; STATNODE(CTLFLAG_RD, numcwdcalls, &numcwdcalls); 785 static u_long numcwdfail1; STATNODE(CTLFLAG_RD, numcwdfail1, &numcwdfail1); 786 static u_long numcwdfail2; STATNODE(CTLFLAG_RD, numcwdfail2, &numcwdfail2); 787 static u_long numcwdfail3; STATNODE(CTLFLAG_RD, numcwdfail3, &numcwdfail3); 788 static u_long numcwdfail4; STATNODE(CTLFLAG_RD, numcwdfail4, &numcwdfail4); 789 static u_long numcwdfound; STATNODE(CTLFLAG_RD, numcwdfound, &numcwdfound); 790 791 /* Implementation of the getcwd syscall */ 792 int 793 __getcwd(td, uap) 794 struct thread *td; 795 struct __getcwd_args *uap; 796 { 797 798 return (kern___getcwd(td, uap->buf, UIO_USERSPACE, uap->buflen)); 799 } 800 801 int 802 kern___getcwd(struct thread *td, u_char *buf, enum uio_seg bufseg, u_int buflen) 803 { 804 char *bp, *tmpbuf; 805 int error, i, slash_prefixed; 806 struct filedesc *fdp; 807 struct namecache *ncp; 808 struct vnode *vp; 809 810 numcwdcalls++; 811 if (disablecwd) 812 return (ENODEV); 813 if (buflen < 2) 814 return (EINVAL); 815 if (buflen > MAXPATHLEN) 816 buflen = MAXPATHLEN; 817 mtx_lock(&Giant); 818 error = 0; 819 tmpbuf = bp = malloc(buflen, M_TEMP, M_WAITOK); 820 bp += buflen - 1; 821 *bp = '\0'; 822 fdp = td->td_proc->p_fd; 823 slash_prefixed = 0; 824 FILEDESC_LOCK(fdp); 825 for (vp = fdp->fd_cdir; vp != fdp->fd_rdir && vp != rootvnode;) { 826 if (vp->v_vflag & VV_ROOT) { 827 if (vp->v_mount == NULL) { /* forced unmount */ 828 error = EBADF; 829 goto out; 830 } 831 vp = vp->v_mount->mnt_vnodecovered; 832 continue; 833 } 834 if (vp->v_dd->v_id != vp->v_ddid) { 835 numcwdfail1++; 836 error = ENOTDIR; 837 goto out; 838 } 839 CACHE_LOCK(); 840 ncp = TAILQ_FIRST(&vp->v_cache_dst); 841 if (!ncp) { 842 numcwdfail2++; 843 CACHE_UNLOCK(); 844 error = ENOENT; 845 goto out; 846 } 847 if (ncp->nc_dvp != vp->v_dd) { 848 numcwdfail3++; 849 CACHE_UNLOCK(); 850 error = EBADF; 851 goto out; 852 } 853 for (i = ncp->nc_nlen - 1; i >= 0; i--) { 854 if (bp == tmpbuf) { 855 numcwdfail4++; 856 CACHE_UNLOCK(); 857 error = ENOMEM; 858 goto out; 859 } 860 *--bp = ncp->nc_name[i]; 861 } 862 if (bp == tmpbuf) { 863 numcwdfail4++; 864 CACHE_UNLOCK(); 865 error = ENOMEM; 866 goto out; 867 } 868 *--bp = '/'; 869 slash_prefixed = 1; 870 vp = vp->v_dd; 871 CACHE_UNLOCK(); 872 } 873 if (!slash_prefixed) { 874 if (bp == tmpbuf) { 875 numcwdfail4++; 876 error = ENOMEM; 877 goto out; 878 } 879 *--bp = '/'; 880 } 881 FILEDESC_UNLOCK(fdp); 882 mtx_unlock(&Giant); 883 numcwdfound++; 884 if (bufseg == UIO_SYSSPACE) 885 bcopy(bp, buf, strlen(bp) + 1); 886 else 887 error = copyout(bp, buf, strlen(bp) + 1); 888 free(tmpbuf, M_TEMP); 889 return (error); 890 out: 891 FILEDESC_UNLOCK(fdp); 892 mtx_unlock(&Giant); 893 free(tmpbuf, M_TEMP); 894 return (error); 895 } 896 897 /* 898 * Thus begins the fullpath magic. 899 */ 900 901 #undef STATNODE 902 #define STATNODE(name) \ 903 static u_int name; \ 904 SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, "") 905 906 static int disablefullpath; 907 SYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, &disablefullpath, 0, 908 "Disable the vn_fullpath function"); 909 910 STATNODE(numfullpathcalls); 911 STATNODE(numfullpathfail1); 912 STATNODE(numfullpathfail2); 913 STATNODE(numfullpathfail3); 914 STATNODE(numfullpathfail4); 915 STATNODE(numfullpathfound); 916 917 /* 918 * Retrieve the full filesystem path that correspond to a vnode from the name 919 * cache (if available) 920 */ 921 int 922 vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf) 923 { 924 char *bp, *buf; 925 int i, slash_prefixed; 926 struct filedesc *fdp; 927 struct namecache *ncp; 928 struct vnode *vp; 929 930 numfullpathcalls++; 931 if (disablefullpath) 932 return (ENODEV); 933 if (vn == NULL) 934 return (EINVAL); 935 buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 936 bp = buf + MAXPATHLEN - 1; 937 *bp = '\0'; 938 fdp = td->td_proc->p_fd; 939 slash_prefixed = 0; 940 ASSERT_VOP_LOCKED(vn, "vn_fullpath"); 941 FILEDESC_LOCK(fdp); 942 for (vp = vn; vp != fdp->fd_rdir && vp != rootvnode;) { 943 if (vp->v_vflag & VV_ROOT) { 944 if (vp->v_mount == NULL) { /* forced unmount */ 945 FILEDESC_UNLOCK(fdp); 946 free(buf, M_TEMP); 947 return (EBADF); 948 } 949 vp = vp->v_mount->mnt_vnodecovered; 950 continue; 951 } 952 if (vp != vn && vp->v_dd->v_id != vp->v_ddid) { 953 FILEDESC_UNLOCK(fdp); 954 free(buf, M_TEMP); 955 numfullpathfail1++; 956 return (ENOTDIR); 957 } 958 CACHE_LOCK(); 959 ncp = TAILQ_FIRST(&vp->v_cache_dst); 960 if (!ncp) { 961 numfullpathfail2++; 962 CACHE_UNLOCK(); 963 FILEDESC_UNLOCK(fdp); 964 free(buf, M_TEMP); 965 return (ENOENT); 966 } 967 if (vp != vn && ncp->nc_dvp != vp->v_dd) { 968 numfullpathfail3++; 969 CACHE_UNLOCK(); 970 FILEDESC_UNLOCK(fdp); 971 free(buf, M_TEMP); 972 return (EBADF); 973 } 974 for (i = ncp->nc_nlen - 1; i >= 0; i--) { 975 if (bp == buf) { 976 numfullpathfail4++; 977 CACHE_UNLOCK(); 978 FILEDESC_UNLOCK(fdp); 979 free(buf, M_TEMP); 980 return (ENOMEM); 981 } 982 *--bp = ncp->nc_name[i]; 983 } 984 if (bp == buf) { 985 numfullpathfail4++; 986 CACHE_UNLOCK(); 987 FILEDESC_UNLOCK(fdp); 988 free(buf, M_TEMP); 989 return (ENOMEM); 990 } 991 *--bp = '/'; 992 slash_prefixed = 1; 993 vp = ncp->nc_dvp; 994 CACHE_UNLOCK(); 995 } 996 if (!slash_prefixed) { 997 if (bp == buf) { 998 numfullpathfail4++; 999 FILEDESC_UNLOCK(fdp); 1000 free(buf, M_TEMP); 1001 return (ENOMEM); 1002 } 1003 *--bp = '/'; 1004 } 1005 FILEDESC_UNLOCK(fdp); 1006 numfullpathfound++; 1007 *retbuf = bp; 1008 *freebuf = buf; 1009 return (0); 1010 } 1011