1 /* 2 * Copyright (c) 1989, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Poul-Henning Kamp of the FreeBSD Project. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95 37 */ 38 39 #include <sys/cdefs.h> 40 __FBSDID("$FreeBSD$"); 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/kernel.h> 45 #include <sys/lock.h> 46 #include <sys/mutex.h> 47 #include <sys/sysctl.h> 48 #include <sys/mount.h> 49 #include <sys/vnode.h> 50 #include <sys/namei.h> 51 #include <sys/malloc.h> 52 #include <sys/syscallsubr.h> 53 #include <sys/sysproto.h> 54 #include <sys/proc.h> 55 #include <sys/filedesc.h> 56 #include <sys/fnv_hash.h> 57 58 #include <vm/uma.h> 59 60 /* 61 * This structure describes the elements in the cache of recent 62 * names looked up by namei. 63 */ 64 65 struct namecache { 66 LIST_ENTRY(namecache) nc_hash; /* hash chain */ 67 LIST_ENTRY(namecache) nc_src; /* source vnode list */ 68 TAILQ_ENTRY(namecache) nc_dst; /* destination vnode list */ 69 struct vnode *nc_dvp; /* vnode of parent of name */ 70 struct vnode *nc_vp; /* vnode the name refers to */ 71 u_char nc_flag; /* flag bits */ 72 u_char nc_nlen; /* length of name */ 73 char nc_name[0]; /* segment name */ 74 }; 75 76 /* 77 * Name caching works as follows: 78 * 79 * Names found by directory scans are retained in a cache 80 * for future reference. It is managed LRU, so frequently 81 * used names will hang around. Cache is indexed by hash value 82 * obtained from (vp, name) where vp refers to the directory 83 * containing name. 84 * 85 * If it is a "negative" entry, (i.e. for a name that is known NOT to 86 * exist) the vnode pointer will be NULL. 87 * 88 * Upon reaching the last segment of a path, if the reference 89 * is for DELETE, or NOCACHE is set (rewrite), and the 90 * name is located in the cache, it will be dropped. 91 */ 92 93 /* 94 * Structures associated with name cacheing. 95 */ 96 #define NCHHASH(hash) \ 97 (&nchashtbl[(hash) & nchash]) 98 static LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */ 99 static TAILQ_HEAD(, namecache) ncneg; /* Hash Table */ 100 static u_long nchash; /* size of hash table */ 101 SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, ""); 102 static u_long ncnegfactor = 16; /* ratio of negative entries */ 103 SYSCTL_ULONG(_debug, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, ""); 104 static u_long numneg; /* number of cache entries allocated */ 105 SYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, ""); 106 static u_long numcache; /* number of cache entries allocated */ 107 SYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, ""); 108 static u_long numcachehv; /* number of cache entries with vnodes held */ 109 SYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0, ""); 110 #if 0 111 static u_long numcachepl; /* number of cache purge for leaf entries */ 112 SYSCTL_ULONG(_debug, OID_AUTO, numcachepl, CTLFLAG_RD, &numcachepl, 0, ""); 113 #endif 114 struct nchstats nchstats; /* cache effectiveness statistics */ 115 116 struct mtx cache_lock; 117 MTX_SYSINIT(vfscache, &cache_lock, "Name Cache", MTX_DEF); 118 119 #define CACHE_LOCK() mtx_lock(&cache_lock) 120 #define CACHE_UNLOCK() mtx_unlock(&cache_lock) 121 122 /* 123 * UMA zones for the VFS cache. 124 * 125 * The small cache is used for entries with short names, which are the 126 * most common. The large cache is used for entries which are too big to 127 * fit in the small cache. 128 */ 129 static uma_zone_t cache_zone_small; 130 static uma_zone_t cache_zone_large; 131 132 #define CACHE_PATH_CUTOFF 32 133 #define CACHE_ZONE_SMALL (sizeof(struct namecache) + CACHE_PATH_CUTOFF) 134 #define CACHE_ZONE_LARGE (sizeof(struct namecache) + NAME_MAX) 135 136 #define cache_alloc(len) uma_zalloc(((len) <= CACHE_PATH_CUTOFF) ? \ 137 cache_zone_small : cache_zone_large, M_WAITOK) 138 #define cache_free(ncp) do { \ 139 if (ncp != NULL) \ 140 uma_zfree(((ncp)->nc_nlen <= CACHE_PATH_CUTOFF) ? \ 141 cache_zone_small : cache_zone_large, (ncp)); \ 142 } while (0) 143 144 static int doingcache = 1; /* 1 => enable the cache */ 145 SYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, ""); 146 147 /* Export size information to userland */ 148 SYSCTL_INT(_debug, OID_AUTO, vnsize, CTLFLAG_RD, 0, sizeof(struct vnode), ""); 149 SYSCTL_INT(_debug, OID_AUTO, ncsize, CTLFLAG_RD, 0, sizeof(struct namecache), ""); 150 151 /* 152 * The new name cache statistics 153 */ 154 SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0, "Name cache statistics"); 155 #define STATNODE(mode, name, var) \ 156 SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, ""); 157 STATNODE(CTLFLAG_RD, numneg, &numneg); 158 STATNODE(CTLFLAG_RD, numcache, &numcache); 159 static u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls); 160 static u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits); 161 static u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits); 162 static u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks); 163 static u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss); 164 static u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap); 165 static u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps); 166 static u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits); 167 static u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps); 168 static u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits); 169 170 SYSCTL_OPAQUE(_vfs_cache, OID_AUTO, nchstats, CTLFLAG_RD, &nchstats, 171 sizeof(nchstats), "LU", "VFS cache effectiveness statistics"); 172 173 174 175 static void cache_zap(struct namecache *ncp, int locked); 176 177 static MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries"); 178 179 /* 180 * Flags in namecache.nc_flag 181 */ 182 #define NCF_WHITE 1 183 184 /* 185 * Grab an atomic snapshot of the name cache hash chain lengths 186 */ 187 SYSCTL_NODE(_debug, OID_AUTO, hashstat, CTLFLAG_RW, NULL, "hash table stats"); 188 189 static int 190 sysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS) 191 { 192 int error; 193 struct nchashhead *ncpp; 194 struct namecache *ncp; 195 int n_nchash; 196 int count; 197 198 n_nchash = nchash + 1; /* nchash is max index, not count */ 199 if (!req->oldptr) 200 return SYSCTL_OUT(req, 0, n_nchash * sizeof(int)); 201 202 /* Scan hash tables for applicable entries */ 203 for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { 204 count = 0; 205 LIST_FOREACH(ncp, ncpp, nc_hash) { 206 count++; 207 } 208 error = SYSCTL_OUT(req, &count, sizeof(count)); 209 if (error) 210 return (error); 211 } 212 return (0); 213 } 214 SYSCTL_PROC(_debug_hashstat, OID_AUTO, rawnchash, CTLTYPE_INT|CTLFLAG_RD, 215 0, 0, sysctl_debug_hashstat_rawnchash, "S,int", "nchash chain lengths"); 216 217 static int 218 sysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS) 219 { 220 int error; 221 struct nchashhead *ncpp; 222 struct namecache *ncp; 223 int n_nchash; 224 int count, maxlength, used, pct; 225 226 if (!req->oldptr) 227 return SYSCTL_OUT(req, 0, 4 * sizeof(int)); 228 229 n_nchash = nchash + 1; /* nchash is max index, not count */ 230 used = 0; 231 maxlength = 0; 232 233 /* Scan hash tables for applicable entries */ 234 for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { 235 count = 0; 236 LIST_FOREACH(ncp, ncpp, nc_hash) { 237 count++; 238 } 239 if (count) 240 used++; 241 if (maxlength < count) 242 maxlength = count; 243 } 244 n_nchash = nchash + 1; 245 pct = (used * 100 * 100) / n_nchash; 246 error = SYSCTL_OUT(req, &n_nchash, sizeof(n_nchash)); 247 if (error) 248 return (error); 249 error = SYSCTL_OUT(req, &used, sizeof(used)); 250 if (error) 251 return (error); 252 error = SYSCTL_OUT(req, &maxlength, sizeof(maxlength)); 253 if (error) 254 return (error); 255 error = SYSCTL_OUT(req, &pct, sizeof(pct)); 256 if (error) 257 return (error); 258 return (0); 259 } 260 SYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE_INT|CTLFLAG_RD, 261 0, 0, sysctl_debug_hashstat_nchash, "I", "nchash chain lengths"); 262 263 /* 264 * cache_zap(): 265 * 266 * Removes a namecache entry from cache, whether it contains an actual 267 * pointer to a vnode or if it is just a negative cache entry. 268 */ 269 static void 270 cache_zap(ncp, locked) 271 struct namecache *ncp; 272 int locked; 273 { 274 struct vnode *vp; 275 276 vp = NULL; 277 if (!locked) 278 CACHE_LOCK(); 279 LIST_REMOVE(ncp, nc_hash); 280 LIST_REMOVE(ncp, nc_src); 281 if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) { 282 vp = ncp->nc_dvp; 283 numcachehv--; 284 } 285 if (ncp->nc_vp) { 286 TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst); 287 } else { 288 TAILQ_REMOVE(&ncneg, ncp, nc_dst); 289 numneg--; 290 } 291 numcache--; 292 CACHE_UNLOCK(); 293 cache_free(ncp); 294 if (vp) 295 vdrop(vp); 296 if (locked) 297 CACHE_LOCK(); 298 } 299 300 /* 301 * cache_leaf_test() 302 * 303 * Test whether this (directory) vnode's namei cache entry contains 304 * subdirectories or not. Used to determine whether the directory is 305 * a leaf in the namei cache or not. Note: the directory may still 306 * contain files in the namei cache. 307 * 308 * Returns 0 if the directory is a leaf, -1 if it isn't. 309 */ 310 int 311 cache_leaf_test(struct vnode *vp) 312 { 313 struct namecache *ncpc; 314 int leaf; 315 316 leaf = 0; 317 CACHE_LOCK(); 318 for (ncpc = LIST_FIRST(&vp->v_cache_src); 319 ncpc != NULL; 320 ncpc = LIST_NEXT(ncpc, nc_src) 321 ) { 322 if (ncpc->nc_vp != NULL && ncpc->nc_vp->v_type == VDIR) { 323 leaf = -1; 324 break; 325 } 326 } 327 CACHE_UNLOCK(); 328 return (leaf); 329 } 330 331 /* 332 * Lookup an entry in the cache 333 * 334 * Lookup is called with dvp pointing to the directory to search, 335 * cnp pointing to the name of the entry being sought. If the lookup 336 * succeeds, the vnode is returned in *vpp, and a status of -1 is 337 * returned. If the lookup determines that the name does not exist 338 * (negative cacheing), a status of ENOENT is returned. If the lookup 339 * fails, a status of zero is returned. 340 */ 341 342 int 343 cache_lookup(dvp, vpp, cnp) 344 struct vnode *dvp; 345 struct vnode **vpp; 346 struct componentname *cnp; 347 { 348 struct namecache *ncp; 349 u_int32_t hash; 350 351 if (!doingcache) { 352 cnp->cn_flags &= ~MAKEENTRY; 353 return (0); 354 } 355 356 CACHE_LOCK(); 357 numcalls++; 358 359 if (cnp->cn_nameptr[0] == '.') { 360 if (cnp->cn_namelen == 1) { 361 *vpp = dvp; 362 dothits++; 363 CACHE_UNLOCK(); 364 return (-1); 365 } 366 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 367 dotdothits++; 368 if (dvp->v_dd->v_id != dvp->v_ddid || 369 (cnp->cn_flags & MAKEENTRY) == 0) { 370 dvp->v_ddid = 0; 371 CACHE_UNLOCK(); 372 return (0); 373 } 374 *vpp = dvp->v_dd; 375 CACHE_UNLOCK(); 376 return (-1); 377 } 378 } 379 380 hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT); 381 hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash); 382 LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) { 383 numchecks++; 384 if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen && 385 !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen)) 386 break; 387 } 388 389 /* We failed to find an entry */ 390 if (ncp == 0) { 391 if ((cnp->cn_flags & MAKEENTRY) == 0) { 392 nummisszap++; 393 } else { 394 nummiss++; 395 } 396 nchstats.ncs_miss++; 397 CACHE_UNLOCK(); 398 return (0); 399 } 400 401 /* We don't want to have an entry, so dump it */ 402 if ((cnp->cn_flags & MAKEENTRY) == 0) { 403 numposzaps++; 404 nchstats.ncs_badhits++; 405 CACHE_UNLOCK(); 406 cache_zap(ncp, 0); 407 return (0); 408 } 409 410 /* We found a "positive" match, return the vnode */ 411 if (ncp->nc_vp) { 412 numposhits++; 413 nchstats.ncs_goodhits++; 414 *vpp = ncp->nc_vp; 415 CACHE_UNLOCK(); 416 return (-1); 417 } 418 419 /* We found a negative match, and want to create it, so purge */ 420 if (cnp->cn_nameiop == CREATE) { 421 numnegzaps++; 422 nchstats.ncs_badhits++; 423 CACHE_UNLOCK(); 424 cache_zap(ncp, 0); 425 return (0); 426 } 427 428 numneghits++; 429 /* 430 * We found a "negative" match, so we shift it to the end of 431 * the "negative" cache entries queue to satisfy LRU. Also, 432 * check to see if the entry is a whiteout; indicate this to 433 * the componentname, if so. 434 */ 435 TAILQ_REMOVE(&ncneg, ncp, nc_dst); 436 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 437 nchstats.ncs_neghits++; 438 if (ncp->nc_flag & NCF_WHITE) 439 cnp->cn_flags |= ISWHITEOUT; 440 CACHE_UNLOCK(); 441 return (ENOENT); 442 } 443 444 /* 445 * Add an entry to the cache. 446 */ 447 void 448 cache_enter(dvp, vp, cnp) 449 struct vnode *dvp; 450 struct vnode *vp; 451 struct componentname *cnp; 452 { 453 struct namecache *ncp; 454 struct nchashhead *ncpp; 455 u_int32_t hash; 456 int hold; 457 int zap; 458 int len; 459 460 if (!doingcache) 461 return; 462 463 if (cnp->cn_nameptr[0] == '.') { 464 if (cnp->cn_namelen == 1) { 465 return; 466 } 467 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 468 if (vp) { 469 dvp->v_dd = vp; 470 dvp->v_ddid = vp->v_id; 471 } else { 472 dvp->v_dd = dvp; 473 dvp->v_ddid = 0; 474 } 475 return; 476 } 477 } 478 479 hold = 0; 480 zap = 0; 481 ncp = cache_alloc(cnp->cn_namelen); 482 CACHE_LOCK(); 483 numcache++; 484 if (!vp) { 485 numneg++; 486 ncp->nc_flag = cnp->cn_flags & ISWHITEOUT ? NCF_WHITE : 0; 487 } else if (vp->v_type == VDIR) { 488 vp->v_dd = dvp; 489 vp->v_ddid = dvp->v_id; 490 } 491 492 /* 493 * Set the rest of the namecache entry elements, calculate it's 494 * hash key and insert it into the appropriate chain within 495 * the cache entries table. 496 */ 497 ncp->nc_vp = vp; 498 ncp->nc_dvp = dvp; 499 len = ncp->nc_nlen = cnp->cn_namelen; 500 hash = fnv_32_buf(cnp->cn_nameptr, len, FNV1_32_INIT); 501 bcopy(cnp->cn_nameptr, ncp->nc_name, len); 502 hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash); 503 ncpp = NCHHASH(hash); 504 LIST_INSERT_HEAD(ncpp, ncp, nc_hash); 505 if (LIST_EMPTY(&dvp->v_cache_src)) { 506 hold = 1; 507 numcachehv++; 508 } 509 LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src); 510 /* 511 * If the entry is "negative", we place it into the 512 * "negative" cache queue, otherwise, we place it into the 513 * destination vnode's cache entries queue. 514 */ 515 if (vp) { 516 TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst); 517 } else { 518 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 519 } 520 if (numneg * ncnegfactor > numcache) { 521 ncp = TAILQ_FIRST(&ncneg); 522 zap = 1; 523 } 524 CACHE_UNLOCK(); 525 if (hold) 526 vhold(dvp); 527 if (zap) 528 cache_zap(ncp, 0); 529 } 530 531 /* 532 * Name cache initialization, from vfs_init() when we are booting 533 */ 534 static void 535 nchinit(void *dummy __unused) 536 { 537 538 TAILQ_INIT(&ncneg); 539 540 cache_zone_small = uma_zcreate("S VFS Cache", CACHE_ZONE_SMALL, NULL, 541 NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT); 542 cache_zone_large = uma_zcreate("L VFS Cache", CACHE_ZONE_LARGE, NULL, 543 NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT); 544 545 nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash); 546 } 547 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL) 548 549 550 /* 551 * Invalidate all entries to a particular vnode. 552 * 553 * Remove all entries in the namecache relating to this vnode and 554 * change the v_id. We take the v_id from a global counter, since 555 * it becomes a handy sequence number in crash-dumps that way. 556 * No valid vnode will ever have (v_id == 0). 557 * 558 * XXX: Only time and the size of v_id prevents this from failing: 559 * XXX: In theory we should hunt down all (struct vnode*, v_id) 560 * XXX: soft references and nuke them, at least on the global 561 * XXX: v_id wraparound. The period of resistance can be extended 562 * XXX: by incrementing each vnodes v_id individually instead of 563 * XXX: using the global v_id. 564 */ 565 566 /* 567 * XXX This is sometimes called when a vnode may still be re-used, in which 568 * case v_dd may be invalid. Need to look this up. 569 */ 570 void 571 cache_purge(vp) 572 struct vnode *vp; 573 { 574 static u_long nextid; 575 576 CACHE_LOCK(); 577 while (!LIST_EMPTY(&vp->v_cache_src)) 578 cache_zap(LIST_FIRST(&vp->v_cache_src), 1); 579 while (!TAILQ_EMPTY(&vp->v_cache_dst)) 580 cache_zap(TAILQ_FIRST(&vp->v_cache_dst), 1); 581 582 do 583 nextid++; 584 while (nextid == vp->v_id || !nextid); 585 vp->v_id = nextid; 586 vp->v_dd = vp; 587 vp->v_ddid = 0; 588 CACHE_UNLOCK(); 589 } 590 591 /* 592 * Flush all entries referencing a particular filesystem. 593 * 594 * Since we need to check it anyway, we will flush all the invalid 595 * entries at the same time. 596 */ 597 void 598 cache_purgevfs(mp) 599 struct mount *mp; 600 { 601 struct nchashhead *ncpp; 602 struct namecache *ncp, *nnp; 603 struct nchashhead mplist; 604 605 LIST_INIT(&mplist); 606 ncp = NULL; 607 608 /* Scan hash tables for applicable entries */ 609 CACHE_LOCK(); 610 for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) { 611 for (ncp = LIST_FIRST(ncpp); ncp != 0; ncp = nnp) { 612 nnp = LIST_NEXT(ncp, nc_hash); 613 if (ncp->nc_dvp->v_mount == mp) { 614 LIST_REMOVE(ncp, nc_hash); 615 LIST_INSERT_HEAD(&mplist, ncp, nc_hash); 616 } 617 } 618 } 619 CACHE_UNLOCK(); 620 while (!LIST_EMPTY(&mplist)) 621 cache_zap(LIST_FIRST(&mplist), 0); 622 } 623 624 /* 625 * Perform canonical checks and cache lookup and pass on to filesystem 626 * through the vop_cachedlookup only if needed. 627 */ 628 629 int 630 vfs_cache_lookup(ap) 631 struct vop_lookup_args /* { 632 struct vnode *a_dvp; 633 struct vnode **a_vpp; 634 struct componentname *a_cnp; 635 } */ *ap; 636 { 637 struct vnode *dvp, *vp; 638 int lockparent; 639 int error; 640 struct vnode **vpp = ap->a_vpp; 641 struct componentname *cnp = ap->a_cnp; 642 struct ucred *cred = cnp->cn_cred; 643 int flags = cnp->cn_flags; 644 struct thread *td = cnp->cn_thread; 645 u_long vpid; /* capability number of vnode */ 646 647 *vpp = NULL; 648 dvp = ap->a_dvp; 649 lockparent = flags & LOCKPARENT; 650 651 if (dvp->v_type != VDIR) 652 return (ENOTDIR); 653 654 if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && 655 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) 656 return (EROFS); 657 658 error = VOP_ACCESS(dvp, VEXEC, cred, td); 659 660 if (error) 661 return (error); 662 663 error = cache_lookup(dvp, vpp, cnp); 664 665 #ifdef LOOKUP_SHARED 666 if (!error) { 667 /* We do this because the rest of the system now expects to get 668 * a shared lock, which is later upgraded if LOCKSHARED is not 669 * set. We have so many cases here because of bugs that yield 670 * inconsistant lock states. This all badly needs to be fixed 671 */ 672 error = VOP_CACHEDLOOKUP(dvp, vpp, cnp); 673 if (!error) { 674 int flock; 675 676 flock = VOP_ISLOCKED(*vpp, td); 677 if (flock != LK_EXCLUSIVE) { 678 if (flock == 0) { 679 if ((flags & ISLASTCN) && 680 (flags & LOCKSHARED)) 681 VOP_LOCK(*vpp, LK_SHARED, td); 682 else 683 VOP_LOCK(*vpp, LK_EXCLUSIVE, td); 684 } 685 } else if ((flags & ISLASTCN) && (flags & LOCKSHARED)) 686 VOP_LOCK(*vpp, LK_DOWNGRADE, td); 687 } 688 return (error); 689 } 690 #else 691 if (!error) 692 return (VOP_CACHEDLOOKUP(dvp, vpp, cnp)); 693 #endif 694 695 if (error == ENOENT) 696 return (error); 697 698 vp = *vpp; 699 vpid = vp->v_id; 700 cnp->cn_flags &= ~PDIRUNLOCK; 701 if (dvp == vp) { /* lookup on "." */ 702 VREF(vp); 703 error = 0; 704 } else if (flags & ISDOTDOT) { 705 VOP_UNLOCK(dvp, 0, td); 706 cnp->cn_flags |= PDIRUNLOCK; 707 #ifdef LOOKUP_SHARED 708 if ((flags & ISLASTCN) && (flags & LOCKSHARED)) 709 error = vget(vp, LK_SHARED, td); 710 else 711 error = vget(vp, LK_EXCLUSIVE, td); 712 #else 713 error = vget(vp, LK_EXCLUSIVE, td); 714 #endif 715 716 if (!error && lockparent && (flags & ISLASTCN)) { 717 if ((error = vn_lock(dvp, LK_EXCLUSIVE, td)) == 0) 718 cnp->cn_flags &= ~PDIRUNLOCK; 719 } 720 } else { 721 #ifdef LOOKUP_SHARED 722 if ((flags & ISLASTCN) && (flags & LOCKSHARED)) 723 error = vget(vp, LK_SHARED, td); 724 else 725 error = vget(vp, LK_EXCLUSIVE, td); 726 #else 727 error = vget(vp, LK_EXCLUSIVE, td); 728 #endif 729 if (!lockparent || error || !(flags & ISLASTCN)) { 730 VOP_UNLOCK(dvp, 0, td); 731 cnp->cn_flags |= PDIRUNLOCK; 732 } 733 } 734 /* 735 * Check that the capability number did not change 736 * while we were waiting for the lock. 737 */ 738 if (!error) { 739 if (vpid == vp->v_id) 740 return (0); 741 vput(vp); 742 if (lockparent && dvp != vp && (flags & ISLASTCN)) { 743 VOP_UNLOCK(dvp, 0, td); 744 cnp->cn_flags |= PDIRUNLOCK; 745 } 746 } 747 if (cnp->cn_flags & PDIRUNLOCK) { 748 error = vn_lock(dvp, LK_EXCLUSIVE, td); 749 if (error) 750 return (error); 751 cnp->cn_flags &= ~PDIRUNLOCK; 752 } 753 #ifdef LOOKUP_SHARED 754 error = VOP_CACHEDLOOKUP(dvp, vpp, cnp); 755 756 if (!error) { 757 int flock = 0; 758 759 flock = VOP_ISLOCKED(*vpp, td); 760 if (flock != LK_EXCLUSIVE) { 761 if (flock == 0) { 762 if ((flags & ISLASTCN) && (flags & LOCKSHARED)) 763 VOP_LOCK(*vpp, LK_SHARED, td); 764 else 765 VOP_LOCK(*vpp, LK_EXCLUSIVE, td); 766 } 767 } else if ((flags & ISLASTCN) && (flags & LOCKSHARED)) 768 VOP_LOCK(*vpp, LK_DOWNGRADE, td); 769 } 770 771 return (error); 772 #else 773 return (VOP_CACHEDLOOKUP(dvp, vpp, cnp)); 774 #endif 775 } 776 777 778 #ifndef _SYS_SYSPROTO_H_ 779 struct __getcwd_args { 780 u_char *buf; 781 u_int buflen; 782 }; 783 #endif 784 785 /* 786 * XXX All of these sysctls would probably be more productive dead. 787 */ 788 static int disablecwd; 789 SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0, 790 "Disable the getcwd syscall"); 791 792 /* Various statistics for the getcwd syscall */ 793 static u_long numcwdcalls; STATNODE(CTLFLAG_RD, numcwdcalls, &numcwdcalls); 794 static u_long numcwdfail1; STATNODE(CTLFLAG_RD, numcwdfail1, &numcwdfail1); 795 static u_long numcwdfail2; STATNODE(CTLFLAG_RD, numcwdfail2, &numcwdfail2); 796 static u_long numcwdfail3; STATNODE(CTLFLAG_RD, numcwdfail3, &numcwdfail3); 797 static u_long numcwdfail4; STATNODE(CTLFLAG_RD, numcwdfail4, &numcwdfail4); 798 static u_long numcwdfound; STATNODE(CTLFLAG_RD, numcwdfound, &numcwdfound); 799 800 /* Implementation of the getcwd syscall */ 801 int 802 __getcwd(td, uap) 803 struct thread *td; 804 struct __getcwd_args *uap; 805 { 806 807 return (kern___getcwd(td, uap->buf, UIO_USERSPACE, uap->buflen)); 808 } 809 810 int 811 kern___getcwd(struct thread *td, u_char *buf, enum uio_seg bufseg, u_int buflen) 812 { 813 char *bp, *tmpbuf; 814 int error, i, slash_prefixed; 815 struct filedesc *fdp; 816 struct namecache *ncp; 817 struct vnode *vp; 818 819 numcwdcalls++; 820 if (disablecwd) 821 return (ENODEV); 822 if (buflen < 2) 823 return (EINVAL); 824 if (buflen > MAXPATHLEN) 825 buflen = MAXPATHLEN; 826 error = 0; 827 tmpbuf = bp = malloc(buflen, M_TEMP, M_WAITOK); 828 bp += buflen - 1; 829 *bp = '\0'; 830 fdp = td->td_proc->p_fd; 831 slash_prefixed = 0; 832 FILEDESC_LOCK(fdp); 833 for (vp = fdp->fd_cdir; vp != fdp->fd_rdir && vp != rootvnode;) { 834 if (vp->v_vflag & VV_ROOT) { 835 if (vp->v_mount == NULL) { /* forced unmount */ 836 FILEDESC_UNLOCK(fdp); 837 free(tmpbuf, M_TEMP); 838 return (EBADF); 839 } 840 vp = vp->v_mount->mnt_vnodecovered; 841 continue; 842 } 843 if (vp->v_dd->v_id != vp->v_ddid) { 844 FILEDESC_UNLOCK(fdp); 845 numcwdfail1++; 846 free(tmpbuf, M_TEMP); 847 return (ENOTDIR); 848 } 849 CACHE_LOCK(); 850 ncp = TAILQ_FIRST(&vp->v_cache_dst); 851 if (!ncp) { 852 numcwdfail2++; 853 CACHE_UNLOCK(); 854 FILEDESC_UNLOCK(fdp); 855 free(tmpbuf, M_TEMP); 856 return (ENOENT); 857 } 858 if (ncp->nc_dvp != vp->v_dd) { 859 numcwdfail3++; 860 CACHE_UNLOCK(); 861 FILEDESC_UNLOCK(fdp); 862 free(tmpbuf, M_TEMP); 863 return (EBADF); 864 } 865 for (i = ncp->nc_nlen - 1; i >= 0; i--) { 866 if (bp == tmpbuf) { 867 numcwdfail4++; 868 CACHE_UNLOCK(); 869 FILEDESC_UNLOCK(fdp); 870 free(tmpbuf, M_TEMP); 871 return (ENOMEM); 872 } 873 *--bp = ncp->nc_name[i]; 874 } 875 if (bp == tmpbuf) { 876 numcwdfail4++; 877 CACHE_UNLOCK(); 878 FILEDESC_UNLOCK(fdp); 879 free(tmpbuf, M_TEMP); 880 return (ENOMEM); 881 } 882 *--bp = '/'; 883 slash_prefixed = 1; 884 vp = vp->v_dd; 885 CACHE_UNLOCK(); 886 } 887 FILEDESC_UNLOCK(fdp); 888 if (!slash_prefixed) { 889 if (bp == tmpbuf) { 890 numcwdfail4++; 891 free(tmpbuf, M_TEMP); 892 return (ENOMEM); 893 } 894 *--bp = '/'; 895 } 896 numcwdfound++; 897 if (bufseg == UIO_SYSSPACE) 898 bcopy(bp, buf, strlen(bp) + 1); 899 else 900 error = copyout(bp, buf, strlen(bp) + 1); 901 free(tmpbuf, M_TEMP); 902 return (error); 903 } 904 905 /* 906 * Thus begins the fullpath magic. 907 */ 908 909 #undef STATNODE 910 #define STATNODE(name) \ 911 static u_int name; \ 912 SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, "") 913 914 static int disablefullpath; 915 SYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, &disablefullpath, 0, 916 "Disable the vn_fullpath function"); 917 918 STATNODE(numfullpathcalls); 919 STATNODE(numfullpathfail1); 920 STATNODE(numfullpathfail2); 921 STATNODE(numfullpathfail3); 922 STATNODE(numfullpathfail4); 923 STATNODE(numfullpathfound); 924 925 /* 926 * Retrieve the full filesystem path that correspond to a vnode from the name 927 * cache (if available) 928 */ 929 int 930 vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf) 931 { 932 char *bp, *buf; 933 int i, slash_prefixed; 934 struct filedesc *fdp; 935 struct namecache *ncp; 936 struct vnode *vp; 937 938 numfullpathcalls++; 939 if (disablefullpath) 940 return (ENODEV); 941 if (vn == NULL) 942 return (EINVAL); 943 buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 944 bp = buf + MAXPATHLEN - 1; 945 *bp = '\0'; 946 fdp = td->td_proc->p_fd; 947 slash_prefixed = 0; 948 ASSERT_VOP_LOCKED(vn, "vn_fullpath"); 949 FILEDESC_LOCK(fdp); 950 for (vp = vn; vp != fdp->fd_rdir && vp != rootvnode;) { 951 if (vp->v_vflag & VV_ROOT) { 952 if (vp->v_mount == NULL) { /* forced unmount */ 953 FILEDESC_UNLOCK(fdp); 954 free(buf, M_TEMP); 955 return (EBADF); 956 } 957 vp = vp->v_mount->mnt_vnodecovered; 958 continue; 959 } 960 if (vp != vn && vp->v_dd->v_id != vp->v_ddid) { 961 FILEDESC_UNLOCK(fdp); 962 free(buf, M_TEMP); 963 numfullpathfail1++; 964 return (ENOTDIR); 965 } 966 CACHE_LOCK(); 967 ncp = TAILQ_FIRST(&vp->v_cache_dst); 968 if (!ncp) { 969 numfullpathfail2++; 970 CACHE_UNLOCK(); 971 FILEDESC_UNLOCK(fdp); 972 free(buf, M_TEMP); 973 return (ENOENT); 974 } 975 if (vp != vn && ncp->nc_dvp != vp->v_dd) { 976 numfullpathfail3++; 977 CACHE_UNLOCK(); 978 FILEDESC_UNLOCK(fdp); 979 free(buf, M_TEMP); 980 return (EBADF); 981 } 982 for (i = ncp->nc_nlen - 1; i >= 0; i--) { 983 if (bp == buf) { 984 numfullpathfail4++; 985 CACHE_UNLOCK(); 986 FILEDESC_UNLOCK(fdp); 987 free(buf, M_TEMP); 988 return (ENOMEM); 989 } 990 *--bp = ncp->nc_name[i]; 991 } 992 if (bp == buf) { 993 numfullpathfail4++; 994 CACHE_UNLOCK(); 995 FILEDESC_UNLOCK(fdp); 996 free(buf, M_TEMP); 997 return (ENOMEM); 998 } 999 *--bp = '/'; 1000 slash_prefixed = 1; 1001 vp = ncp->nc_dvp; 1002 CACHE_UNLOCK(); 1003 } 1004 if (!slash_prefixed) { 1005 if (bp == buf) { 1006 numfullpathfail4++; 1007 FILEDESC_UNLOCK(fdp); 1008 free(buf, M_TEMP); 1009 return (ENOMEM); 1010 } 1011 *--bp = '/'; 1012 } 1013 FILEDESC_UNLOCK(fdp); 1014 numfullpathfound++; 1015 *retbuf = bp; 1016 *freebuf = buf; 1017 return (0); 1018 } 1019