1 /* 2 * Copyright (c) 1989, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Poul-Henning Kamp of the FreeBSD Project. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95 37 * $FreeBSD$ 38 */ 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/kernel.h> 43 #include <sys/lock.h> 44 #include <sys/mutex.h> 45 #include <sys/sysctl.h> 46 #include <sys/mount.h> 47 #include <sys/vnode.h> 48 #include <sys/namei.h> 49 #include <sys/malloc.h> 50 #include <sys/syscallsubr.h> 51 #include <sys/sysproto.h> 52 #include <sys/proc.h> 53 #include <sys/filedesc.h> 54 #include <sys/fnv_hash.h> 55 56 /* 57 * This structure describes the elements in the cache of recent 58 * names looked up by namei. 59 */ 60 61 struct namecache { 62 LIST_ENTRY(namecache) nc_hash; /* hash chain */ 63 LIST_ENTRY(namecache) nc_src; /* source vnode list */ 64 TAILQ_ENTRY(namecache) nc_dst; /* destination vnode list */ 65 struct vnode *nc_dvp; /* vnode of parent of name */ 66 struct vnode *nc_vp; /* vnode the name refers to */ 67 u_char nc_flag; /* flag bits */ 68 u_char nc_nlen; /* length of name */ 69 char nc_name[0]; /* segment name */ 70 }; 71 72 /* 73 * Name caching works as follows: 74 * 75 * Names found by directory scans are retained in a cache 76 * for future reference. It is managed LRU, so frequently 77 * used names will hang around. Cache is indexed by hash value 78 * obtained from (vp, name) where vp refers to the directory 79 * containing name. 80 * 81 * If it is a "negative" entry, (i.e. for a name that is known NOT to 82 * exist) the vnode pointer will be NULL. 83 * 84 * Upon reaching the last segment of a path, if the reference 85 * is for DELETE, or NOCACHE is set (rewrite), and the 86 * name is located in the cache, it will be dropped. 87 */ 88 89 /* 90 * Structures associated with name cacheing. 91 */ 92 #define NCHHASH(hash) \ 93 (&nchashtbl[(hash) & nchash]) 94 static LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */ 95 static TAILQ_HEAD(, namecache) ncneg; /* Hash Table */ 96 static u_long nchash; /* size of hash table */ 97 SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, ""); 98 static u_long ncnegfactor = 16; /* ratio of negative entries */ 99 SYSCTL_ULONG(_debug, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, ""); 100 static u_long numneg; /* number of cache entries allocated */ 101 SYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, ""); 102 static u_long numcache; /* number of cache entries allocated */ 103 SYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, ""); 104 static u_long numcachehv; /* number of cache entries with vnodes held */ 105 SYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0, ""); 106 #if 0 107 static u_long numcachepl; /* number of cache purge for leaf entries */ 108 SYSCTL_ULONG(_debug, OID_AUTO, numcachepl, CTLFLAG_RD, &numcachepl, 0, ""); 109 #endif 110 struct nchstats nchstats; /* cache effectiveness statistics */ 111 112 static int doingcache = 1; /* 1 => enable the cache */ 113 SYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, ""); 114 115 /* Export size information to userland */ 116 SYSCTL_INT(_debug, OID_AUTO, vnsize, CTLFLAG_RD, 0, sizeof(struct vnode), ""); 117 SYSCTL_INT(_debug, OID_AUTO, ncsize, CTLFLAG_RD, 0, sizeof(struct namecache), ""); 118 119 /* 120 * The new name cache statistics 121 */ 122 SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0, "Name cache statistics"); 123 #define STATNODE(mode, name, var) \ 124 SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, ""); 125 STATNODE(CTLFLAG_RD, numneg, &numneg); 126 STATNODE(CTLFLAG_RD, numcache, &numcache); 127 static u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls); 128 static u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits); 129 static u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits); 130 static u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks); 131 static u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss); 132 static u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap); 133 static u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps); 134 static u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits); 135 static u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps); 136 static u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits); 137 138 SYSCTL_OPAQUE(_vfs_cache, OID_AUTO, nchstats, CTLFLAG_RD, &nchstats, 139 sizeof(nchstats), "LU", "VFS cache effectiveness statistics"); 140 141 142 143 static void cache_zap(struct namecache *ncp); 144 145 static MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries"); 146 147 /* 148 * Flags in namecache.nc_flag 149 */ 150 #define NCF_WHITE 1 151 152 /* 153 * Grab an atomic snapshot of the name cache hash chain lengths 154 */ 155 SYSCTL_NODE(_debug, OID_AUTO, hashstat, CTLFLAG_RW, NULL, "hash table stats"); 156 157 static int 158 sysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS) 159 { 160 int error; 161 struct nchashhead *ncpp; 162 struct namecache *ncp; 163 int n_nchash; 164 int count; 165 166 n_nchash = nchash + 1; /* nchash is max index, not count */ 167 if (!req->oldptr) 168 return SYSCTL_OUT(req, 0, n_nchash * sizeof(int)); 169 170 /* Scan hash tables for applicable entries */ 171 for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { 172 count = 0; 173 LIST_FOREACH(ncp, ncpp, nc_hash) { 174 count++; 175 } 176 error = SYSCTL_OUT(req, &count, sizeof(count)); 177 if (error) 178 return (error); 179 } 180 return (0); 181 } 182 SYSCTL_PROC(_debug_hashstat, OID_AUTO, rawnchash, CTLTYPE_INT|CTLFLAG_RD, 183 0, 0, sysctl_debug_hashstat_rawnchash, "S,int", "nchash chain lengths"); 184 185 static int 186 sysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS) 187 { 188 int error; 189 struct nchashhead *ncpp; 190 struct namecache *ncp; 191 int n_nchash; 192 int count, maxlength, used, pct; 193 194 if (!req->oldptr) 195 return SYSCTL_OUT(req, 0, 4 * sizeof(int)); 196 197 n_nchash = nchash + 1; /* nchash is max index, not count */ 198 used = 0; 199 maxlength = 0; 200 201 /* Scan hash tables for applicable entries */ 202 for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { 203 count = 0; 204 LIST_FOREACH(ncp, ncpp, nc_hash) { 205 count++; 206 } 207 if (count) 208 used++; 209 if (maxlength < count) 210 maxlength = count; 211 } 212 n_nchash = nchash + 1; 213 pct = (used * 100 * 100) / n_nchash; 214 error = SYSCTL_OUT(req, &n_nchash, sizeof(n_nchash)); 215 if (error) 216 return (error); 217 error = SYSCTL_OUT(req, &used, sizeof(used)); 218 if (error) 219 return (error); 220 error = SYSCTL_OUT(req, &maxlength, sizeof(maxlength)); 221 if (error) 222 return (error); 223 error = SYSCTL_OUT(req, &pct, sizeof(pct)); 224 if (error) 225 return (error); 226 return (0); 227 } 228 SYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE_INT|CTLFLAG_RD, 229 0, 0, sysctl_debug_hashstat_nchash, "I", "nchash chain lengths"); 230 231 /* 232 * cache_zap(): 233 * 234 * Removes a namecache entry from cache, whether it contains an actual 235 * pointer to a vnode or if it is just a negative cache entry. 236 */ 237 static void 238 cache_zap(ncp) 239 struct namecache *ncp; 240 { 241 LIST_REMOVE(ncp, nc_hash); 242 LIST_REMOVE(ncp, nc_src); 243 if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) { 244 vdrop(ncp->nc_dvp); 245 numcachehv--; 246 } 247 if (ncp->nc_vp) { 248 TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst); 249 } else { 250 TAILQ_REMOVE(&ncneg, ncp, nc_dst); 251 numneg--; 252 } 253 numcache--; 254 free(ncp, M_VFSCACHE); 255 } 256 257 /* 258 * cache_leaf_test() 259 * 260 * Test whether this (directory) vnode's namei cache entry contains 261 * subdirectories or not. Used to determine whether the directory is 262 * a leaf in the namei cache or not. Note: the directory may still 263 * contain files in the namei cache. 264 * 265 * Returns 0 if the directory is a leaf, -1 if it isn't. 266 */ 267 int 268 cache_leaf_test(struct vnode *vp) 269 { 270 struct namecache *ncpc; 271 272 for (ncpc = LIST_FIRST(&vp->v_cache_src); 273 ncpc != NULL; 274 ncpc = LIST_NEXT(ncpc, nc_src) 275 ) { 276 if (ncpc->nc_vp != NULL && ncpc->nc_vp->v_type == VDIR) 277 return(-1); 278 } 279 return(0); 280 } 281 282 /* 283 * Lookup an entry in the cache 284 * 285 * Lookup is called with dvp pointing to the directory to search, 286 * cnp pointing to the name of the entry being sought. If the lookup 287 * succeeds, the vnode is returned in *vpp, and a status of -1 is 288 * returned. If the lookup determines that the name does not exist 289 * (negative cacheing), a status of ENOENT is returned. If the lookup 290 * fails, a status of zero is returned. 291 */ 292 293 int 294 cache_lookup(dvp, vpp, cnp) 295 struct vnode *dvp; 296 struct vnode **vpp; 297 struct componentname *cnp; 298 { 299 struct namecache *ncp; 300 u_int32_t hash; 301 302 if (!doingcache) { 303 cnp->cn_flags &= ~MAKEENTRY; 304 return (0); 305 } 306 307 numcalls++; 308 309 if (cnp->cn_nameptr[0] == '.') { 310 if (cnp->cn_namelen == 1) { 311 *vpp = dvp; 312 dothits++; 313 return (-1); 314 } 315 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 316 dotdothits++; 317 if (dvp->v_dd->v_id != dvp->v_ddid || 318 (cnp->cn_flags & MAKEENTRY) == 0) { 319 dvp->v_ddid = 0; 320 return (0); 321 } 322 *vpp = dvp->v_dd; 323 return (-1); 324 } 325 } 326 327 hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT); 328 hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash); 329 LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) { 330 numchecks++; 331 if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen && 332 !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen)) 333 break; 334 } 335 336 /* We failed to find an entry */ 337 if (ncp == 0) { 338 if ((cnp->cn_flags & MAKEENTRY) == 0) { 339 nummisszap++; 340 } else { 341 nummiss++; 342 } 343 nchstats.ncs_miss++; 344 return (0); 345 } 346 347 /* We don't want to have an entry, so dump it */ 348 if ((cnp->cn_flags & MAKEENTRY) == 0) { 349 numposzaps++; 350 nchstats.ncs_badhits++; 351 cache_zap(ncp); 352 return (0); 353 } 354 355 /* We found a "positive" match, return the vnode */ 356 if (ncp->nc_vp) { 357 numposhits++; 358 nchstats.ncs_goodhits++; 359 *vpp = ncp->nc_vp; 360 return (-1); 361 } 362 363 /* We found a negative match, and want to create it, so purge */ 364 if (cnp->cn_nameiop == CREATE) { 365 numnegzaps++; 366 nchstats.ncs_badhits++; 367 cache_zap(ncp); 368 return (0); 369 } 370 371 numneghits++; 372 /* 373 * We found a "negative" match, so we shift it to the end of 374 * the "negative" cache entries queue to satisfy LRU. Also, 375 * check to see if the entry is a whiteout; indicate this to 376 * the componentname, if so. 377 */ 378 TAILQ_REMOVE(&ncneg, ncp, nc_dst); 379 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 380 nchstats.ncs_neghits++; 381 if (ncp->nc_flag & NCF_WHITE) 382 cnp->cn_flags |= ISWHITEOUT; 383 return (ENOENT); 384 } 385 386 /* 387 * Add an entry to the cache. 388 */ 389 void 390 cache_enter(dvp, vp, cnp) 391 struct vnode *dvp; 392 struct vnode *vp; 393 struct componentname *cnp; 394 { 395 struct namecache *ncp; 396 struct nchashhead *ncpp; 397 u_int32_t hash; 398 int len; 399 400 if (!doingcache) 401 return; 402 403 if (cnp->cn_nameptr[0] == '.') { 404 if (cnp->cn_namelen == 1) { 405 return; 406 } 407 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 408 if (vp) { 409 dvp->v_dd = vp; 410 dvp->v_ddid = vp->v_id; 411 } else { 412 dvp->v_dd = dvp; 413 dvp->v_ddid = 0; 414 } 415 return; 416 } 417 } 418 419 ncp = (struct namecache *) 420 malloc(sizeof *ncp + cnp->cn_namelen, M_VFSCACHE, M_WAITOK); 421 bzero((char *)ncp, sizeof *ncp); 422 numcache++; 423 if (!vp) { 424 numneg++; 425 ncp->nc_flag = cnp->cn_flags & ISWHITEOUT ? NCF_WHITE : 0; 426 } else if (vp->v_type == VDIR) { 427 vp->v_dd = dvp; 428 vp->v_ddid = dvp->v_id; 429 } 430 431 /* 432 * Set the rest of the namecache entry elements, calculate it's 433 * hash key and insert it into the appropriate chain within 434 * the cache entries table. 435 */ 436 ncp->nc_vp = vp; 437 ncp->nc_dvp = dvp; 438 len = ncp->nc_nlen = cnp->cn_namelen; 439 hash = fnv_32_buf(cnp->cn_nameptr, len, FNV1_32_INIT); 440 bcopy(cnp->cn_nameptr, ncp->nc_name, len); 441 hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash); 442 ncpp = NCHHASH(hash); 443 LIST_INSERT_HEAD(ncpp, ncp, nc_hash); 444 if (LIST_EMPTY(&dvp->v_cache_src)) { 445 vhold(dvp); 446 numcachehv++; 447 } 448 LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src); 449 /* 450 * If the entry is "negative", we place it into the 451 * "negative" cache queue, otherwise, we place it into the 452 * destination vnode's cache entries queue. 453 */ 454 if (vp) { 455 TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst); 456 } else { 457 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 458 } 459 if (numneg * ncnegfactor > numcache) { 460 ncp = TAILQ_FIRST(&ncneg); 461 cache_zap(ncp); 462 } 463 } 464 465 /* 466 * Name cache initialization, from vfs_init() when we are booting 467 */ 468 static void 469 nchinit(void *dummy __unused) 470 { 471 472 TAILQ_INIT(&ncneg); 473 nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash); 474 } 475 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL) 476 477 478 /* 479 * Invalidate all entries to a particular vnode. 480 * 481 * Remove all entries in the namecache relating to this vnode and 482 * change the v_id. We take the v_id from a global counter, since 483 * it becomes a handy sequence number in crash-dumps that way. 484 * No valid vnode will ever have (v_id == 0). 485 * 486 * XXX: Only time and the size of v_id prevents this from failing: 487 * XXX: In theory we should hunt down all (struct vnode*, v_id) 488 * XXX: soft references and nuke them, at least on the global 489 * XXX: v_id wraparound. The period of resistance can be extended 490 * XXX: by incrementing each vnodes v_id individually instead of 491 * XXX: using the global v_id. 492 */ 493 494 void 495 cache_purge(vp) 496 struct vnode *vp; 497 { 498 static u_long nextid; 499 500 while (!LIST_EMPTY(&vp->v_cache_src)) 501 cache_zap(LIST_FIRST(&vp->v_cache_src)); 502 while (!TAILQ_EMPTY(&vp->v_cache_dst)) 503 cache_zap(TAILQ_FIRST(&vp->v_cache_dst)); 504 505 do 506 nextid++; 507 while (nextid == vp->v_id || !nextid); 508 vp->v_id = nextid; 509 vp->v_dd = vp; 510 vp->v_ddid = 0; 511 } 512 513 /* 514 * Flush all entries referencing a particular filesystem. 515 * 516 * Since we need to check it anyway, we will flush all the invalid 517 * entries at the same time. 518 */ 519 void 520 cache_purgevfs(mp) 521 struct mount *mp; 522 { 523 struct nchashhead *ncpp; 524 struct namecache *ncp, *nnp; 525 526 /* Scan hash tables for applicable entries */ 527 for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) { 528 for (ncp = LIST_FIRST(ncpp); ncp != 0; ncp = nnp) { 529 nnp = LIST_NEXT(ncp, nc_hash); 530 if (ncp->nc_dvp->v_mount == mp) { 531 cache_zap(ncp); 532 } 533 } 534 } 535 } 536 537 /* 538 * Perform canonical checks and cache lookup and pass on to filesystem 539 * through the vop_cachedlookup only if needed. 540 */ 541 542 int 543 vfs_cache_lookup(ap) 544 struct vop_lookup_args /* { 545 struct vnode *a_dvp; 546 struct vnode **a_vpp; 547 struct componentname *a_cnp; 548 } */ *ap; 549 { 550 struct vnode *dvp, *vp; 551 int lockparent; 552 int error; 553 struct vnode **vpp = ap->a_vpp; 554 struct componentname *cnp = ap->a_cnp; 555 struct ucred *cred = cnp->cn_cred; 556 int flags = cnp->cn_flags; 557 struct thread *td = cnp->cn_thread; 558 u_long vpid; /* capability number of vnode */ 559 560 *vpp = NULL; 561 dvp = ap->a_dvp; 562 lockparent = flags & LOCKPARENT; 563 564 if (dvp->v_type != VDIR) 565 return (ENOTDIR); 566 567 if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && 568 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) 569 return (EROFS); 570 571 error = VOP_ACCESS(dvp, VEXEC, cred, td); 572 573 if (error) 574 return (error); 575 576 error = cache_lookup(dvp, vpp, cnp); 577 578 #ifdef LOOKUP_SHARED 579 if (!error) { 580 /* We do this because the rest of the system now expects to get 581 * a shared lock, which is later upgraded if LOCKSHARED is not 582 * set. We have so many cases here because of bugs that yield 583 * inconsistant lock states. This all badly needs to be fixed 584 */ 585 error = VOP_CACHEDLOOKUP(dvp, vpp, cnp); 586 if (!error) { 587 int flock; 588 589 flock = VOP_ISLOCKED(*vpp, td); 590 if (flock != LK_EXCLUSIVE) { 591 if (flock == 0) { 592 if ((flags & ISLASTCN) && 593 (flags & LOCKSHARED)) 594 VOP_LOCK(*vpp, LK_SHARED, td); 595 else 596 VOP_LOCK(*vpp, LK_EXCLUSIVE, td); 597 } 598 } else if ((flags & ISLASTCN) && (flags & LOCKSHARED)) 599 VOP_LOCK(*vpp, LK_DOWNGRADE, td); 600 } 601 return (error); 602 } 603 #else 604 if (!error) 605 return (VOP_CACHEDLOOKUP(dvp, vpp, cnp)); 606 #endif 607 608 if (error == ENOENT) 609 return (error); 610 611 vp = *vpp; 612 vpid = vp->v_id; 613 cnp->cn_flags &= ~PDIRUNLOCK; 614 if (dvp == vp) { /* lookup on "." */ 615 VREF(vp); 616 error = 0; 617 } else if (flags & ISDOTDOT) { 618 VOP_UNLOCK(dvp, 0, td); 619 cnp->cn_flags |= PDIRUNLOCK; 620 #ifdef LOOKUP_SHARED 621 if ((flags & ISLASTCN) && (flags & LOCKSHARED)) 622 error = vget(vp, LK_SHARED, td); 623 else 624 error = vget(vp, LK_EXCLUSIVE, td); 625 #else 626 error = vget(vp, LK_EXCLUSIVE, td); 627 #endif 628 629 if (!error && lockparent && (flags & ISLASTCN)) { 630 if ((error = vn_lock(dvp, LK_EXCLUSIVE, td)) == 0) 631 cnp->cn_flags &= ~PDIRUNLOCK; 632 } 633 } else { 634 #ifdef LOOKUP_SHARED 635 if ((flags & ISLASTCN) && (flags & LOCKSHARED)) 636 error = vget(vp, LK_SHARED, td); 637 else 638 error = vget(vp, LK_EXCLUSIVE, td); 639 #else 640 error = vget(vp, LK_EXCLUSIVE, td); 641 #endif 642 if (!lockparent || error || !(flags & ISLASTCN)) { 643 VOP_UNLOCK(dvp, 0, td); 644 cnp->cn_flags |= PDIRUNLOCK; 645 } 646 } 647 /* 648 * Check that the capability number did not change 649 * while we were waiting for the lock. 650 */ 651 if (!error) { 652 if (vpid == vp->v_id) 653 return (0); 654 vput(vp); 655 if (lockparent && dvp != vp && (flags & ISLASTCN)) { 656 VOP_UNLOCK(dvp, 0, td); 657 cnp->cn_flags |= PDIRUNLOCK; 658 } 659 } 660 if (cnp->cn_flags & PDIRUNLOCK) { 661 error = vn_lock(dvp, LK_EXCLUSIVE, td); 662 if (error) 663 return (error); 664 cnp->cn_flags &= ~PDIRUNLOCK; 665 } 666 #ifdef LOOKUP_SHARED 667 error = VOP_CACHEDLOOKUP(dvp, vpp, cnp); 668 669 if (!error) { 670 int flock = 0; 671 672 flock = VOP_ISLOCKED(*vpp, td); 673 if (flock != LK_EXCLUSIVE) { 674 if (flock == 0) { 675 if ((flags & ISLASTCN) && (flags & LOCKSHARED)) 676 VOP_LOCK(*vpp, LK_SHARED, td); 677 else 678 VOP_LOCK(*vpp, LK_EXCLUSIVE, td); 679 } 680 } else if ((flags & ISLASTCN) && (flags & LOCKSHARED)) 681 VOP_LOCK(*vpp, LK_DOWNGRADE, td); 682 } 683 684 return (error); 685 #else 686 return (VOP_CACHEDLOOKUP(dvp, vpp, cnp)); 687 #endif 688 } 689 690 691 #ifndef _SYS_SYSPROTO_H_ 692 struct __getcwd_args { 693 u_char *buf; 694 u_int buflen; 695 }; 696 #endif 697 698 /* 699 * XXX All of these sysctls would probably be more productive dead. 700 */ 701 static int disablecwd; 702 SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0, 703 "Disable the getcwd syscall"); 704 705 /* Various statistics for the getcwd syscall */ 706 static u_long numcwdcalls; STATNODE(CTLFLAG_RD, numcwdcalls, &numcwdcalls); 707 static u_long numcwdfail1; STATNODE(CTLFLAG_RD, numcwdfail1, &numcwdfail1); 708 static u_long numcwdfail2; STATNODE(CTLFLAG_RD, numcwdfail2, &numcwdfail2); 709 static u_long numcwdfail3; STATNODE(CTLFLAG_RD, numcwdfail3, &numcwdfail3); 710 static u_long numcwdfail4; STATNODE(CTLFLAG_RD, numcwdfail4, &numcwdfail4); 711 static u_long numcwdfound; STATNODE(CTLFLAG_RD, numcwdfound, &numcwdfound); 712 713 /* Implementation of the getcwd syscall */ 714 int 715 __getcwd(td, uap) 716 struct thread *td; 717 struct __getcwd_args *uap; 718 { 719 720 return (kern___getcwd(td, uap->buf, UIO_USERSPACE, uap->buflen)); 721 } 722 723 int 724 kern___getcwd(struct thread *td, u_char *buf, enum uio_seg bufseg, u_int buflen) 725 { 726 char *bp, *tmpbuf; 727 int error, i, slash_prefixed; 728 struct filedesc *fdp; 729 struct namecache *ncp; 730 struct vnode *vp; 731 732 numcwdcalls++; 733 if (disablecwd) 734 return (ENODEV); 735 if (buflen < 2) 736 return (EINVAL); 737 if (buflen > MAXPATHLEN) 738 buflen = MAXPATHLEN; 739 error = 0; 740 tmpbuf = bp = malloc(buflen, M_TEMP, M_WAITOK); 741 bp += buflen - 1; 742 *bp = '\0'; 743 fdp = td->td_proc->p_fd; 744 slash_prefixed = 0; 745 FILEDESC_LOCK(fdp); 746 mp_fixme("No vnode locking done!"); 747 for (vp = fdp->fd_cdir; vp != fdp->fd_rdir && vp != rootvnode;) { 748 if (vp->v_vflag & VV_ROOT) { 749 if (vp->v_mount == NULL) { /* forced unmount */ 750 FILEDESC_UNLOCK(fdp); 751 free(tmpbuf, M_TEMP); 752 return (EBADF); 753 } 754 vp = vp->v_mount->mnt_vnodecovered; 755 continue; 756 } 757 if (vp->v_dd->v_id != vp->v_ddid) { 758 FILEDESC_UNLOCK(fdp); 759 numcwdfail1++; 760 free(tmpbuf, M_TEMP); 761 return (ENOTDIR); 762 } 763 ncp = TAILQ_FIRST(&vp->v_cache_dst); 764 if (!ncp) { 765 FILEDESC_UNLOCK(fdp); 766 numcwdfail2++; 767 free(tmpbuf, M_TEMP); 768 return (ENOENT); 769 } 770 if (ncp->nc_dvp != vp->v_dd) { 771 FILEDESC_UNLOCK(fdp); 772 numcwdfail3++; 773 free(tmpbuf, M_TEMP); 774 return (EBADF); 775 } 776 for (i = ncp->nc_nlen - 1; i >= 0; i--) { 777 if (bp == tmpbuf) { 778 FILEDESC_UNLOCK(fdp); 779 numcwdfail4++; 780 free(tmpbuf, M_TEMP); 781 return (ENOMEM); 782 } 783 *--bp = ncp->nc_name[i]; 784 } 785 if (bp == tmpbuf) { 786 FILEDESC_UNLOCK(fdp); 787 numcwdfail4++; 788 free(tmpbuf, M_TEMP); 789 return (ENOMEM); 790 } 791 *--bp = '/'; 792 slash_prefixed = 1; 793 vp = vp->v_dd; 794 } 795 FILEDESC_UNLOCK(fdp); 796 if (!slash_prefixed) { 797 if (bp == tmpbuf) { 798 numcwdfail4++; 799 free(tmpbuf, M_TEMP); 800 return (ENOMEM); 801 } 802 *--bp = '/'; 803 } 804 numcwdfound++; 805 if (bufseg == UIO_SYSSPACE) 806 bcopy(bp, buf, strlen(bp) + 1); 807 else 808 error = copyout(bp, buf, strlen(bp) + 1); 809 free(tmpbuf, M_TEMP); 810 return (error); 811 } 812 813 /* 814 * Thus begins the fullpath magic. 815 */ 816 817 #undef STATNODE 818 #define STATNODE(name) \ 819 static u_int name; \ 820 SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, "") 821 822 static int disablefullpath; 823 SYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, &disablefullpath, 0, 824 "Disable the vn_fullpath function"); 825 826 STATNODE(numfullpathcalls); 827 STATNODE(numfullpathfail1); 828 STATNODE(numfullpathfail2); 829 STATNODE(numfullpathfail3); 830 STATNODE(numfullpathfail4); 831 STATNODE(numfullpathfound); 832 833 /* 834 * Retrieve the full filesystem path that correspond to a vnode from the name 835 * cache (if available) 836 */ 837 int 838 vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf) 839 { 840 char *bp, *buf; 841 int i, slash_prefixed; 842 struct filedesc *fdp; 843 struct namecache *ncp; 844 struct vnode *vp; 845 846 numfullpathcalls++; 847 if (disablefullpath) 848 return (ENODEV); 849 if (vn == NULL) 850 return (EINVAL); 851 buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 852 bp = buf + MAXPATHLEN - 1; 853 *bp = '\0'; 854 fdp = td->td_proc->p_fd; 855 slash_prefixed = 0; 856 FILEDESC_LOCK(fdp); 857 for (vp = vn; vp != fdp->fd_rdir && vp != rootvnode;) { 858 ASSERT_VOP_LOCKED(vp, "vn_fullpath"); 859 if (vp->v_vflag & VV_ROOT) { 860 if (vp->v_mount == NULL) { /* forced unmount */ 861 FILEDESC_UNLOCK(fdp); 862 free(buf, M_TEMP); 863 return (EBADF); 864 } 865 vp = vp->v_mount->mnt_vnodecovered; 866 continue; 867 } 868 if (vp != vn && vp->v_dd->v_id != vp->v_ddid) { 869 FILEDESC_UNLOCK(fdp); 870 numfullpathfail1++; 871 free(buf, M_TEMP); 872 return (ENOTDIR); 873 } 874 ncp = TAILQ_FIRST(&vp->v_cache_dst); 875 if (!ncp) { 876 FILEDESC_UNLOCK(fdp); 877 numfullpathfail2++; 878 free(buf, M_TEMP); 879 return (ENOENT); 880 } 881 if (vp != vn && ncp->nc_dvp != vp->v_dd) { 882 FILEDESC_UNLOCK(fdp); 883 numfullpathfail3++; 884 free(buf, M_TEMP); 885 return (EBADF); 886 } 887 for (i = ncp->nc_nlen - 1; i >= 0; i--) { 888 if (bp == buf) { 889 FILEDESC_UNLOCK(fdp); 890 numfullpathfail4++; 891 free(buf, M_TEMP); 892 return (ENOMEM); 893 } 894 *--bp = ncp->nc_name[i]; 895 } 896 if (bp == buf) { 897 FILEDESC_UNLOCK(fdp); 898 numfullpathfail4++; 899 free(buf, M_TEMP); 900 return (ENOMEM); 901 } 902 *--bp = '/'; 903 slash_prefixed = 1; 904 vp = ncp->nc_dvp; 905 } 906 if (!slash_prefixed) { 907 if (bp == buf) { 908 FILEDESC_UNLOCK(fdp); 909 numfullpathfail4++; 910 free(buf, M_TEMP); 911 return (ENOMEM); 912 } 913 *--bp = '/'; 914 } 915 FILEDESC_UNLOCK(fdp); 916 numfullpathfound++; 917 *retbuf = bp; 918 *freebuf = buf; 919 return (0); 920 } 921