1 /* 2 * Copyright (c) 1989, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Poul-Henning Kamp of the FreeBSD Project. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95 37 * $FreeBSD$ 38 */ 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/kernel.h> 43 #include <sys/lock.h> 44 #include <sys/sysctl.h> 45 #include <sys/mount.h> 46 #include <sys/vnode.h> 47 #include <sys/namei.h> 48 #include <sys/malloc.h> 49 #include <sys/sysproto.h> 50 #include <sys/proc.h> 51 #include <sys/filedesc.h> 52 #include <sys/fnv_hash.h> 53 54 /* 55 * This structure describes the elements in the cache of recent 56 * names looked up by namei. 57 */ 58 59 struct namecache { 60 LIST_ENTRY(namecache) nc_hash; /* hash chain */ 61 LIST_ENTRY(namecache) nc_src; /* source vnode list */ 62 TAILQ_ENTRY(namecache) nc_dst; /* destination vnode list */ 63 struct vnode *nc_dvp; /* vnode of parent of name */ 64 struct vnode *nc_vp; /* vnode the name refers to */ 65 u_char nc_flag; /* flag bits */ 66 u_char nc_nlen; /* length of name */ 67 char nc_name[0]; /* segment name */ 68 }; 69 70 /* 71 * Name caching works as follows: 72 * 73 * Names found by directory scans are retained in a cache 74 * for future reference. It is managed LRU, so frequently 75 * used names will hang around. Cache is indexed by hash value 76 * obtained from (vp, name) where vp refers to the directory 77 * containing name. 78 * 79 * If it is a "negative" entry, (i.e. for a name that is known NOT to 80 * exist) the vnode pointer will be NULL. 81 * 82 * Upon reaching the last segment of a path, if the reference 83 * is for DELETE, or NOCACHE is set (rewrite), and the 84 * name is located in the cache, it will be dropped. 85 */ 86 87 /* 88 * Structures associated with name cacheing. 89 */ 90 #define NCHHASH(hash) \ 91 (&nchashtbl[(hash) & nchash]) 92 static LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */ 93 static TAILQ_HEAD(, namecache) ncneg; /* Hash Table */ 94 static u_long nchash; /* size of hash table */ 95 SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, ""); 96 static u_long ncnegfactor = 16; /* ratio of negative entries */ 97 SYSCTL_ULONG(_debug, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, ""); 98 static u_long numneg; /* number of cache entries allocated */ 99 SYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, ""); 100 static u_long numcache; /* number of cache entries allocated */ 101 SYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, ""); 102 static u_long numcachehv; /* number of cache entries with vnodes held */ 103 SYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0, ""); 104 static u_long numcachepl; /* number of cache purge for leaf entries */ 105 SYSCTL_ULONG(_debug, OID_AUTO, numcachepl, CTLFLAG_RD, &numcachepl, 0, ""); 106 struct nchstats nchstats; /* cache effectiveness statistics */ 107 108 static int doingcache = 1; /* 1 => enable the cache */ 109 SYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, ""); 110 SYSCTL_INT(_debug, OID_AUTO, vnsize, CTLFLAG_RD, 0, sizeof(struct vnode), ""); 111 SYSCTL_INT(_debug, OID_AUTO, ncsize, CTLFLAG_RD, 0, sizeof(struct namecache), ""); 112 113 /* 114 * The new name cache statistics 115 */ 116 SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0, "Name cache statistics"); 117 #define STATNODE(mode, name, var) \ 118 SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, ""); 119 STATNODE(CTLFLAG_RD, numneg, &numneg); 120 STATNODE(CTLFLAG_RD, numcache, &numcache); 121 static u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls); 122 static u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits); 123 static u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits); 124 static u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks); 125 static u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss); 126 static u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap); 127 static u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps); 128 static u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits); 129 static u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps); 130 static u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits); 131 132 SYSCTL_OPAQUE(_vfs_cache, OID_AUTO, nchstats, CTLFLAG_RD, &nchstats, 133 sizeof(nchstats), "LU", "VFS cache effectiveness statistics"); 134 135 136 137 static void cache_zap __P((struct namecache *ncp)); 138 139 static MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries"); 140 141 /* 142 * Flags in namecache.nc_flag 143 */ 144 #define NCF_WHITE 1 145 146 /* 147 * Grab an atomic snapshot of the name cache hash chain lengths 148 */ 149 SYSCTL_NODE(_debug, OID_AUTO, hashstat, CTLFLAG_RW, NULL, "hash table stats"); 150 151 static int 152 sysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS) 153 { 154 int error; 155 struct nchashhead *ncpp; 156 struct namecache *ncp; 157 int n_nchash; 158 int count; 159 160 n_nchash = nchash + 1; /* nchash is max index, not count */ 161 if (!req->oldptr) 162 return SYSCTL_OUT(req, 0, n_nchash * sizeof(int)); 163 164 /* Scan hash tables for applicable entries */ 165 for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { 166 count = 0; 167 LIST_FOREACH(ncp, ncpp, nc_hash) { 168 count++; 169 } 170 error = SYSCTL_OUT(req, (caddr_t)&count, sizeof(count)); 171 if (error) 172 return (error); 173 } 174 return (0); 175 } 176 SYSCTL_PROC(_debug_hashstat, OID_AUTO, rawnchash, CTLTYPE_INT|CTLFLAG_RD, 177 0, 0, sysctl_debug_hashstat_rawnchash, "S,int", "nchash chain lengths"); 178 179 static int 180 sysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS) 181 { 182 int error; 183 struct nchashhead *ncpp; 184 struct namecache *ncp; 185 int n_nchash; 186 int count, maxlength, used, pct; 187 188 if (!req->oldptr) 189 return SYSCTL_OUT(req, 0, 4 * sizeof(int)); 190 191 n_nchash = nchash + 1; /* nchash is max index, not count */ 192 used = 0; 193 maxlength = 0; 194 195 /* Scan hash tables for applicable entries */ 196 for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { 197 count = 0; 198 LIST_FOREACH(ncp, ncpp, nc_hash) { 199 count++; 200 } 201 if (count) 202 used++; 203 if (maxlength < count) 204 maxlength = count; 205 } 206 n_nchash = nchash + 1; 207 pct = (used * 100 * 100) / n_nchash; 208 error = SYSCTL_OUT(req, (caddr_t)&n_nchash, sizeof(n_nchash)); 209 if (error) 210 return (error); 211 error = SYSCTL_OUT(req, (caddr_t)&used, sizeof(used)); 212 if (error) 213 return (error); 214 error = SYSCTL_OUT(req, (caddr_t)&maxlength, sizeof(maxlength)); 215 if (error) 216 return (error); 217 error = SYSCTL_OUT(req, (caddr_t)&pct, sizeof(pct)); 218 if (error) 219 return (error); 220 return (0); 221 } 222 SYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE_INT|CTLFLAG_RD, 223 0, 0, sysctl_debug_hashstat_nchash, "I", "nchash chain lengths"); 224 225 /* 226 * Delete an entry from its hash list and move it to the front 227 * of the LRU list for immediate reuse. 228 */ 229 static void 230 cache_zap(ncp) 231 struct namecache *ncp; 232 { 233 LIST_REMOVE(ncp, nc_hash); 234 LIST_REMOVE(ncp, nc_src); 235 if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) { 236 vdrop(ncp->nc_dvp); 237 numcachehv--; 238 } 239 if (ncp->nc_vp) { 240 TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst); 241 } else { 242 TAILQ_REMOVE(&ncneg, ncp, nc_dst); 243 numneg--; 244 } 245 numcache--; 246 free(ncp, M_VFSCACHE); 247 } 248 249 /* 250 * Lookup an entry in the cache 251 * 252 * We don't do this if the segment name is long, simply so the cache 253 * can avoid holding long names (which would either waste space, or 254 * add greatly to the complexity). 255 * 256 * Lookup is called with dvp pointing to the directory to search, 257 * cnp pointing to the name of the entry being sought. If the lookup 258 * succeeds, the vnode is returned in *vpp, and a status of -1 is 259 * returned. If the lookup determines that the name does not exist 260 * (negative cacheing), a status of ENOENT is returned. If the lookup 261 * fails, a status of zero is returned. 262 */ 263 264 int 265 cache_lookup(dvp, vpp, cnp) 266 struct vnode *dvp; 267 struct vnode **vpp; 268 struct componentname *cnp; 269 { 270 struct namecache *ncp; 271 u_int32_t hash; 272 273 if (!doingcache) { 274 cnp->cn_flags &= ~MAKEENTRY; 275 return (0); 276 } 277 278 numcalls++; 279 280 if (cnp->cn_nameptr[0] == '.') { 281 if (cnp->cn_namelen == 1) { 282 *vpp = dvp; 283 dothits++; 284 return (-1); 285 } 286 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 287 dotdothits++; 288 if (dvp->v_dd->v_id != dvp->v_ddid || 289 (cnp->cn_flags & MAKEENTRY) == 0) { 290 dvp->v_ddid = 0; 291 return (0); 292 } 293 *vpp = dvp->v_dd; 294 return (-1); 295 } 296 } 297 298 hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT); 299 hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash); 300 LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) { 301 numchecks++; 302 if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen && 303 !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen)) 304 break; 305 } 306 307 /* We failed to find an entry */ 308 if (ncp == 0) { 309 if ((cnp->cn_flags & MAKEENTRY) == 0) { 310 nummisszap++; 311 } else { 312 nummiss++; 313 } 314 nchstats.ncs_miss++; 315 return (0); 316 } 317 318 /* We don't want to have an entry, so dump it */ 319 if ((cnp->cn_flags & MAKEENTRY) == 0) { 320 numposzaps++; 321 nchstats.ncs_badhits++; 322 cache_zap(ncp); 323 return (0); 324 } 325 326 /* We found a "positive" match, return the vnode */ 327 if (ncp->nc_vp) { 328 numposhits++; 329 nchstats.ncs_goodhits++; 330 *vpp = ncp->nc_vp; 331 return (-1); 332 } 333 334 /* We found a negative match, and want to create it, so purge */ 335 if (cnp->cn_nameiop == CREATE) { 336 numnegzaps++; 337 nchstats.ncs_badhits++; 338 cache_zap(ncp); 339 return (0); 340 } 341 342 numneghits++; 343 /* 344 * We found a "negative" match, ENOENT notifies client of this match. 345 * The nc_vpid field records whether this is a whiteout. 346 */ 347 TAILQ_REMOVE(&ncneg, ncp, nc_dst); 348 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 349 nchstats.ncs_neghits++; 350 if (ncp->nc_flag & NCF_WHITE) 351 cnp->cn_flags |= ISWHITEOUT; 352 return (ENOENT); 353 } 354 355 /* 356 * Add an entry to the cache. 357 */ 358 void 359 cache_enter(dvp, vp, cnp) 360 struct vnode *dvp; 361 struct vnode *vp; 362 struct componentname *cnp; 363 { 364 struct namecache *ncp; 365 struct nchashhead *ncpp; 366 u_int32_t hash; 367 int len; 368 369 if (!doingcache) 370 return; 371 372 if (cnp->cn_nameptr[0] == '.') { 373 if (cnp->cn_namelen == 1) { 374 return; 375 } 376 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 377 if (vp) { 378 dvp->v_dd = vp; 379 dvp->v_ddid = vp->v_id; 380 } else { 381 dvp->v_dd = dvp; 382 dvp->v_ddid = 0; 383 } 384 return; 385 } 386 } 387 388 ncp = (struct namecache *) 389 malloc(sizeof *ncp + cnp->cn_namelen, M_VFSCACHE, M_WAITOK); 390 bzero((char *)ncp, sizeof *ncp); 391 numcache++; 392 if (!vp) { 393 numneg++; 394 ncp->nc_flag = cnp->cn_flags & ISWHITEOUT ? NCF_WHITE : 0; 395 } else if (vp->v_type == VDIR) { 396 vp->v_dd = dvp; 397 vp->v_ddid = dvp->v_id; 398 } 399 400 /* 401 * Fill in cache info, if vp is NULL this is a "negative" cache entry. 402 * For negative entries, we have to record whether it is a whiteout. 403 * the whiteout flag is stored in the nc_vpid field which is 404 * otherwise unused. 405 */ 406 ncp->nc_vp = vp; 407 ncp->nc_dvp = dvp; 408 len = ncp->nc_nlen = cnp->cn_namelen; 409 hash = fnv_32_buf(cnp->cn_nameptr, len, FNV1_32_INIT); 410 bcopy(cnp->cn_nameptr, ncp->nc_name, len); 411 hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash); 412 ncpp = NCHHASH(hash); 413 LIST_INSERT_HEAD(ncpp, ncp, nc_hash); 414 if (LIST_EMPTY(&dvp->v_cache_src)) { 415 vhold(dvp); 416 numcachehv++; 417 } 418 LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src); 419 if (vp) { 420 TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst); 421 } else { 422 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 423 } 424 if (numneg * ncnegfactor > numcache) { 425 ncp = TAILQ_FIRST(&ncneg); 426 cache_zap(ncp); 427 } 428 } 429 430 /* 431 * Name cache initialization, from vfs_init() when we are booting 432 */ 433 static void 434 nchinit(void *dummy __unused) 435 { 436 437 TAILQ_INIT(&ncneg); 438 nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash); 439 } 440 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL) 441 442 443 /* 444 * Invalidate all entries to a particular vnode. 445 * 446 * Remove all entries in the namecache relating to this vnode and 447 * change the v_id. We take the v_id from a global counter, since 448 * it becomes a handy sequence number in crash-dumps that way. 449 * No valid vnode will ever have (v_id == 0). 450 * 451 * XXX: Only time and the size of v_id prevents this from failing: 452 * XXX: In theory we should hunt down all (struct vnode*, v_id) 453 * XXX: soft references and nuke them, at least on the global 454 * XXX: v_id wraparound. The period of resistance can be extended 455 * XXX: by incrementing each vnodes v_id individually instead of 456 * XXX: using the global v_id. 457 */ 458 459 void 460 cache_purge(vp) 461 struct vnode *vp; 462 { 463 static u_long nextid; 464 465 while (!LIST_EMPTY(&vp->v_cache_src)) 466 cache_zap(LIST_FIRST(&vp->v_cache_src)); 467 while (!TAILQ_EMPTY(&vp->v_cache_dst)) 468 cache_zap(TAILQ_FIRST(&vp->v_cache_dst)); 469 470 do 471 nextid++; 472 while (nextid == vp->v_id || !nextid); 473 vp->v_id = nextid; 474 vp->v_dd = vp; 475 vp->v_ddid = 0; 476 } 477 478 /* 479 * Flush all entries referencing a particular filesystem. 480 * 481 * Since we need to check it anyway, we will flush all the invalid 482 * entries at the same time. 483 */ 484 void 485 cache_purgevfs(mp) 486 struct mount *mp; 487 { 488 struct nchashhead *ncpp; 489 struct namecache *ncp, *nnp; 490 491 /* Scan hash tables for applicable entries */ 492 for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) { 493 for (ncp = LIST_FIRST(ncpp); ncp != 0; ncp = nnp) { 494 nnp = LIST_NEXT(ncp, nc_hash); 495 if (ncp->nc_dvp->v_mount == mp) { 496 cache_zap(ncp); 497 } 498 } 499 } 500 } 501 502 /* 503 * Flush all dirctory entries with no child directories held in 504 * the cache. 505 * 506 * Since we need to check it anyway, we will flush all the invalid 507 * entries at the same time. 508 */ 509 void 510 cache_purgeleafdirs(ndir) 511 int ndir; 512 { 513 struct nchashhead *ncpp; 514 struct namecache *ncp, *nnp, *ncpc, *nnpc; 515 struct vnode *dvp; 516 517 /* Scan hash tables for applicable entries */ 518 for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl && ndir > 0; ncpp--) { 519 for (ncp = LIST_FIRST(ncpp); ncp != 0 && ndir > 0; ncp = nnp) { 520 nnp = LIST_NEXT(ncp, nc_hash); 521 if (ncp->nc_dvp != 0) { 522 /* 523 * Skip over if nc_dvp of this cache holds 524 * a child directory, or the hold count of 525 * nc_dvp is greater than 1 (in which case 526 * nc_dvp is likely to be the working 527 * directory of a process). 528 */ 529 if (ncp->nc_dvp->v_holdcnt > 1) 530 continue; 531 for (ncpc = LIST_FIRST(&ncp->nc_dvp->v_cache_src); 532 ncpc != 0; ncpc = nnpc) { 533 nnpc = LIST_NEXT(ncpc, nc_src); 534 if (ncpc->nc_vp != 0 && ncpc->nc_vp->v_type == VDIR) 535 break; 536 } 537 if (ncpc == 0) { 538 /* 539 * Zap all of this directory's children, 540 * held in ncp->nc_dvp->v_cache_src. 541 */ 542 dvp = ncp->nc_dvp; 543 while (!LIST_EMPTY(&dvp->v_cache_src)) 544 cache_zap(LIST_FIRST(&dvp->v_cache_src)); 545 546 ndir--; 547 548 /* Restart in case where nnp is reclaimed. */ 549 nnp = LIST_FIRST(ncpp); 550 continue; 551 } 552 } 553 } 554 } 555 numcachepl++; 556 } 557 558 /* 559 * Perform canonical checks and cache lookup and pass on to filesystem 560 * through the vop_cachedlookup only if needed. 561 */ 562 563 int 564 vfs_cache_lookup(ap) 565 struct vop_lookup_args /* { 566 struct vnode *a_dvp; 567 struct vnode **a_vpp; 568 struct componentname *a_cnp; 569 } */ *ap; 570 { 571 struct vnode *dvp, *vp; 572 int lockparent; 573 int error; 574 struct vnode **vpp = ap->a_vpp; 575 struct componentname *cnp = ap->a_cnp; 576 struct ucred *cred = cnp->cn_cred; 577 int flags = cnp->cn_flags; 578 struct proc *p = cnp->cn_proc; 579 u_long vpid; /* capability number of vnode */ 580 581 *vpp = NULL; 582 dvp = ap->a_dvp; 583 lockparent = flags & LOCKPARENT; 584 585 if (dvp->v_type != VDIR) 586 return (ENOTDIR); 587 588 if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && 589 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) 590 return (EROFS); 591 592 error = VOP_ACCESS(dvp, VEXEC, cred, p); 593 594 if (error) 595 return (error); 596 597 error = cache_lookup(dvp, vpp, cnp); 598 599 if (!error) 600 return (VOP_CACHEDLOOKUP(dvp, vpp, cnp)); 601 602 if (error == ENOENT) 603 return (error); 604 605 vp = *vpp; 606 vpid = vp->v_id; 607 cnp->cn_flags &= ~PDIRUNLOCK; 608 if (dvp == vp) { /* lookup on "." */ 609 VREF(vp); 610 error = 0; 611 } else if (flags & ISDOTDOT) { 612 VOP_UNLOCK(dvp, 0, p); 613 cnp->cn_flags |= PDIRUNLOCK; 614 error = vget(vp, LK_EXCLUSIVE, p); 615 if (!error && lockparent && (flags & ISLASTCN)) { 616 if ((error = vn_lock(dvp, LK_EXCLUSIVE, p)) == 0) 617 cnp->cn_flags &= ~PDIRUNLOCK; 618 } 619 } else { 620 error = vget(vp, LK_EXCLUSIVE, p); 621 if (!lockparent || error || !(flags & ISLASTCN)) { 622 VOP_UNLOCK(dvp, 0, p); 623 cnp->cn_flags |= PDIRUNLOCK; 624 } 625 } 626 /* 627 * Check that the capability number did not change 628 * while we were waiting for the lock. 629 */ 630 if (!error) { 631 if (vpid == vp->v_id) 632 return (0); 633 vput(vp); 634 if (lockparent && dvp != vp && (flags & ISLASTCN)) { 635 VOP_UNLOCK(dvp, 0, p); 636 cnp->cn_flags |= PDIRUNLOCK; 637 } 638 } 639 if (cnp->cn_flags & PDIRUNLOCK) { 640 error = vn_lock(dvp, LK_EXCLUSIVE, p); 641 if (error) 642 return (error); 643 cnp->cn_flags &= ~PDIRUNLOCK; 644 } 645 return (VOP_CACHEDLOOKUP(dvp, vpp, cnp)); 646 } 647 648 649 #ifndef _SYS_SYSPROTO_H_ 650 struct __getcwd_args { 651 u_char *buf; 652 u_int buflen; 653 }; 654 #endif 655 656 static int disablecwd; 657 SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0, ""); 658 659 static u_long numcwdcalls; STATNODE(CTLFLAG_RD, numcwdcalls, &numcwdcalls); 660 static u_long numcwdfail1; STATNODE(CTLFLAG_RD, numcwdfail1, &numcwdfail1); 661 static u_long numcwdfail2; STATNODE(CTLFLAG_RD, numcwdfail2, &numcwdfail2); 662 static u_long numcwdfail3; STATNODE(CTLFLAG_RD, numcwdfail3, &numcwdfail3); 663 static u_long numcwdfail4; STATNODE(CTLFLAG_RD, numcwdfail4, &numcwdfail4); 664 static u_long numcwdfound; STATNODE(CTLFLAG_RD, numcwdfound, &numcwdfound); 665 int 666 __getcwd(p, uap) 667 struct proc *p; 668 struct __getcwd_args *uap; 669 { 670 char *bp, *buf; 671 int error, i, slash_prefixed; 672 struct filedesc *fdp; 673 struct namecache *ncp; 674 struct vnode *vp; 675 676 numcwdcalls++; 677 if (disablecwd) 678 return (ENODEV); 679 if (uap->buflen < 2) 680 return (EINVAL); 681 if (uap->buflen > MAXPATHLEN) 682 uap->buflen = MAXPATHLEN; 683 buf = bp = malloc(uap->buflen, M_TEMP, M_WAITOK); 684 bp += uap->buflen - 1; 685 *bp = '\0'; 686 fdp = p->p_fd; 687 slash_prefixed = 0; 688 for (vp = fdp->fd_cdir; vp != fdp->fd_rdir && vp != rootvnode;) { 689 if (vp->v_flag & VROOT) { 690 if (vp->v_mount == NULL) /* forced unmount */ 691 return (EBADF); 692 vp = vp->v_mount->mnt_vnodecovered; 693 continue; 694 } 695 if (vp->v_dd->v_id != vp->v_ddid) { 696 numcwdfail1++; 697 free(buf, M_TEMP); 698 return (ENOTDIR); 699 } 700 ncp = TAILQ_FIRST(&vp->v_cache_dst); 701 if (!ncp) { 702 numcwdfail2++; 703 free(buf, M_TEMP); 704 return (ENOENT); 705 } 706 if (ncp->nc_dvp != vp->v_dd) { 707 numcwdfail3++; 708 free(buf, M_TEMP); 709 return (EBADF); 710 } 711 for (i = ncp->nc_nlen - 1; i >= 0; i--) { 712 if (bp == buf) { 713 numcwdfail4++; 714 free(buf, M_TEMP); 715 return (ENOMEM); 716 } 717 *--bp = ncp->nc_name[i]; 718 } 719 if (bp == buf) { 720 numcwdfail4++; 721 free(buf, M_TEMP); 722 return (ENOMEM); 723 } 724 *--bp = '/'; 725 slash_prefixed = 1; 726 vp = vp->v_dd; 727 } 728 if (!slash_prefixed) { 729 if (bp == buf) { 730 numcwdfail4++; 731 free(buf, M_TEMP); 732 return (ENOMEM); 733 } 734 *--bp = '/'; 735 } 736 numcwdfound++; 737 error = copyout(bp, uap->buf, strlen(bp) + 1); 738 free(buf, M_TEMP); 739 return (error); 740 } 741 742 /* 743 * Thus begins the fullpath magic. 744 */ 745 746 #undef STATNODE 747 #define STATNODE(name) \ 748 static u_int name; \ 749 SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, "") 750 751 static int disablefullpath; 752 SYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, 753 &disablefullpath, 0, ""); 754 755 STATNODE(numfullpathcalls); 756 STATNODE(numfullpathfail1); 757 STATNODE(numfullpathfail2); 758 STATNODE(numfullpathfail3); 759 STATNODE(numfullpathfail4); 760 STATNODE(numfullpathfound); 761 762 int 763 textvp_fullpath(struct proc *p, char **retbuf, char **retfreebuf) { 764 char *bp, *buf; 765 int i, slash_prefixed; 766 struct filedesc *fdp; 767 struct namecache *ncp; 768 struct vnode *vp, *textvp; 769 770 numfullpathcalls++; 771 if (disablefullpath) 772 return (ENODEV); 773 textvp = p->p_textvp; 774 if (textvp == NULL) 775 return (EINVAL); 776 buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 777 bp = buf + MAXPATHLEN - 1; 778 *bp = '\0'; 779 fdp = p->p_fd; 780 slash_prefixed = 0; 781 for (vp = textvp; vp != fdp->fd_rdir && vp != rootvnode;) { 782 if (vp->v_flag & VROOT) { 783 if (vp->v_mount == NULL) { /* forced unmount */ 784 free(buf, M_TEMP); 785 return (EBADF); 786 } 787 vp = vp->v_mount->mnt_vnodecovered; 788 continue; 789 } 790 if (vp != textvp && vp->v_dd->v_id != vp->v_ddid) { 791 numfullpathfail1++; 792 free(buf, M_TEMP); 793 return (ENOTDIR); 794 } 795 ncp = TAILQ_FIRST(&vp->v_cache_dst); 796 if (!ncp) { 797 numfullpathfail2++; 798 free(buf, M_TEMP); 799 return (ENOENT); 800 } 801 if (vp != textvp && ncp->nc_dvp != vp->v_dd) { 802 numfullpathfail3++; 803 free(buf, M_TEMP); 804 return (EBADF); 805 } 806 for (i = ncp->nc_nlen - 1; i >= 0; i--) { 807 if (bp == buf) { 808 numfullpathfail4++; 809 free(buf, M_TEMP); 810 return (ENOMEM); 811 } 812 *--bp = ncp->nc_name[i]; 813 } 814 if (bp == buf) { 815 numfullpathfail4++; 816 free(buf, M_TEMP); 817 return (ENOMEM); 818 } 819 *--bp = '/'; 820 slash_prefixed = 1; 821 vp = ncp->nc_dvp; 822 } 823 if (!slash_prefixed) { 824 if (bp == buf) { 825 numfullpathfail4++; 826 free(buf, M_TEMP); 827 return (ENOMEM); 828 } 829 *--bp = '/'; 830 } 831 numfullpathfound++; 832 *retbuf = bp; 833 *retfreebuf = buf; 834 return (0); 835 } 836