1 /*- 2 * Copyright (c) 1989, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Poul-Henning Kamp of the FreeBSD Project. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include "opt_ktrace.h" 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/filedesc.h> 43 #include <sys/fnv_hash.h> 44 #include <sys/kernel.h> 45 #include <sys/lock.h> 46 #include <sys/malloc.h> 47 #include <sys/fcntl.h> 48 #include <sys/mount.h> 49 #include <sys/namei.h> 50 #include <sys/proc.h> 51 #include <sys/rwlock.h> 52 #include <sys/sdt.h> 53 #include <sys/syscallsubr.h> 54 #include <sys/sysctl.h> 55 #include <sys/sysproto.h> 56 #include <sys/vnode.h> 57 #ifdef KTRACE 58 #include <sys/ktrace.h> 59 #endif 60 61 #include <vm/uma.h> 62 63 SDT_PROVIDER_DECLARE(vfs); 64 SDT_PROBE_DEFINE3(vfs, namecache, enter, done, "struct vnode *", "char *", 65 "struct vnode *"); 66 SDT_PROBE_DEFINE2(vfs, namecache, enter_negative, done, "struct vnode *", 67 "char *"); 68 SDT_PROBE_DEFINE1(vfs, namecache, fullpath, entry, "struct vnode *"); 69 SDT_PROBE_DEFINE3(vfs, namecache, fullpath, hit, "struct vnode *", 70 "char *", "struct vnode *"); 71 SDT_PROBE_DEFINE1(vfs, namecache, fullpath, miss, "struct vnode *"); 72 SDT_PROBE_DEFINE3(vfs, namecache, fullpath, return, "int", 73 "struct vnode *", "char *"); 74 SDT_PROBE_DEFINE3(vfs, namecache, lookup, hit, "struct vnode *", "char *", 75 "struct vnode *"); 76 SDT_PROBE_DEFINE2(vfs, namecache, lookup, hit__negative, 77 "struct vnode *", "char *"); 78 SDT_PROBE_DEFINE2(vfs, namecache, lookup, miss, "struct vnode *", 79 "char *"); 80 SDT_PROBE_DEFINE1(vfs, namecache, purge, done, "struct vnode *"); 81 SDT_PROBE_DEFINE1(vfs, namecache, purge_negative, done, "struct vnode *"); 82 SDT_PROBE_DEFINE1(vfs, namecache, purgevfs, done, "struct mount *"); 83 SDT_PROBE_DEFINE3(vfs, namecache, zap, done, "struct vnode *", "char *", 84 "struct vnode *"); 85 SDT_PROBE_DEFINE2(vfs, namecache, zap_negative, done, "struct vnode *", 86 "char *"); 87 88 /* 89 * This structure describes the elements in the cache of recent 90 * names looked up by namei. 91 */ 92 93 struct namecache { 94 LIST_ENTRY(namecache) nc_hash; /* hash chain */ 95 LIST_ENTRY(namecache) nc_src; /* source vnode list */ 96 TAILQ_ENTRY(namecache) nc_dst; /* destination vnode list */ 97 struct vnode *nc_dvp; /* vnode of parent of name */ 98 struct vnode *nc_vp; /* vnode the name refers to */ 99 u_char nc_flag; /* flag bits */ 100 u_char nc_nlen; /* length of name */ 101 char nc_name[0]; /* segment name + nul */ 102 }; 103 104 /* 105 * struct namecache_ts repeats struct namecache layout up to the 106 * nc_nlen member. 107 * struct namecache_ts is used in place of struct namecache when time(s) need 108 * to be stored. The nc_dotdottime field is used when a cache entry is mapping 109 * both a non-dotdot directory name plus dotdot for the directory's 110 * parent. 111 */ 112 struct namecache_ts { 113 LIST_ENTRY(namecache) nc_hash; /* hash chain */ 114 LIST_ENTRY(namecache) nc_src; /* source vnode list */ 115 TAILQ_ENTRY(namecache) nc_dst; /* destination vnode list */ 116 struct vnode *nc_dvp; /* vnode of parent of name */ 117 struct vnode *nc_vp; /* vnode the name refers to */ 118 u_char nc_flag; /* flag bits */ 119 u_char nc_nlen; /* length of name */ 120 struct timespec nc_time; /* timespec provided by fs */ 121 struct timespec nc_dotdottime; /* dotdot timespec provided by fs */ 122 int nc_ticks; /* ticks value when entry was added */ 123 char nc_name[0]; /* segment name + nul */ 124 }; 125 126 /* 127 * Flags in namecache.nc_flag 128 */ 129 #define NCF_WHITE 0x01 130 #define NCF_ISDOTDOT 0x02 131 #define NCF_TS 0x04 132 #define NCF_DTS 0x08 133 134 /* 135 * Name caching works as follows: 136 * 137 * Names found by directory scans are retained in a cache 138 * for future reference. It is managed LRU, so frequently 139 * used names will hang around. Cache is indexed by hash value 140 * obtained from (vp, name) where vp refers to the directory 141 * containing name. 142 * 143 * If it is a "negative" entry, (i.e. for a name that is known NOT to 144 * exist) the vnode pointer will be NULL. 145 * 146 * Upon reaching the last segment of a path, if the reference 147 * is for DELETE, or NOCACHE is set (rewrite), and the 148 * name is located in the cache, it will be dropped. 149 */ 150 151 /* 152 * Structures associated with name cacheing. 153 */ 154 #define NCHHASH(hash) \ 155 (&nchashtbl[(hash) & nchash]) 156 static LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */ 157 static TAILQ_HEAD(, namecache) ncneg; /* Hash Table */ 158 static u_long nchash; /* size of hash table */ 159 SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, 160 "Size of namecache hash table"); 161 static u_long ncnegfactor = 16; /* ratio of negative entries */ 162 SYSCTL_ULONG(_vfs, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, 163 "Ratio of negative namecache entries"); 164 static u_long numneg; /* number of negative entries allocated */ 165 SYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, 166 "Number of negative entries in namecache"); 167 static u_long numcache; /* number of cache entries allocated */ 168 SYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, 169 "Number of namecache entries"); 170 static u_long numcachehv; /* number of cache entries with vnodes held */ 171 SYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0, 172 "Number of namecache entries with vnodes held"); 173 static u_int ncsizefactor = 2; 174 SYSCTL_UINT(_vfs, OID_AUTO, ncsizefactor, CTLFLAG_RW, &ncsizefactor, 0, 175 "Size factor for namecache"); 176 177 struct nchstats nchstats; /* cache effectiveness statistics */ 178 179 static struct rwlock cache_lock; 180 RW_SYSINIT(vfscache, &cache_lock, "Name Cache"); 181 182 #define CACHE_UPGRADE_LOCK() rw_try_upgrade(&cache_lock) 183 #define CACHE_RLOCK() rw_rlock(&cache_lock) 184 #define CACHE_RUNLOCK() rw_runlock(&cache_lock) 185 #define CACHE_WLOCK() rw_wlock(&cache_lock) 186 #define CACHE_WUNLOCK() rw_wunlock(&cache_lock) 187 188 /* 189 * UMA zones for the VFS cache. 190 * 191 * The small cache is used for entries with short names, which are the 192 * most common. The large cache is used for entries which are too big to 193 * fit in the small cache. 194 */ 195 static uma_zone_t cache_zone_small; 196 static uma_zone_t cache_zone_small_ts; 197 static uma_zone_t cache_zone_large; 198 static uma_zone_t cache_zone_large_ts; 199 200 #define CACHE_PATH_CUTOFF 35 201 202 static struct namecache * 203 cache_alloc(int len, int ts) 204 { 205 206 if (len > CACHE_PATH_CUTOFF) { 207 if (ts) 208 return (uma_zalloc(cache_zone_large_ts, M_WAITOK)); 209 else 210 return (uma_zalloc(cache_zone_large, M_WAITOK)); 211 } 212 if (ts) 213 return (uma_zalloc(cache_zone_small_ts, M_WAITOK)); 214 else 215 return (uma_zalloc(cache_zone_small, M_WAITOK)); 216 } 217 218 static void 219 cache_free(struct namecache *ncp) 220 { 221 int ts; 222 223 if (ncp == NULL) 224 return; 225 ts = ncp->nc_flag & NCF_TS; 226 if (ncp->nc_nlen <= CACHE_PATH_CUTOFF) { 227 if (ts) 228 uma_zfree(cache_zone_small_ts, ncp); 229 else 230 uma_zfree(cache_zone_small, ncp); 231 } else if (ts) 232 uma_zfree(cache_zone_large_ts, ncp); 233 else 234 uma_zfree(cache_zone_large, ncp); 235 } 236 237 static char * 238 nc_get_name(struct namecache *ncp) 239 { 240 struct namecache_ts *ncp_ts; 241 242 if ((ncp->nc_flag & NCF_TS) == 0) 243 return (ncp->nc_name); 244 ncp_ts = (struct namecache_ts *)ncp; 245 return (ncp_ts->nc_name); 246 } 247 248 static void 249 cache_out_ts(struct namecache *ncp, struct timespec *tsp, int *ticksp) 250 { 251 252 KASSERT((ncp->nc_flag & NCF_TS) != 0 || 253 (tsp == NULL && ticksp == NULL), 254 ("No NCF_TS")); 255 256 if (tsp != NULL) 257 *tsp = ((struct namecache_ts *)ncp)->nc_time; 258 if (ticksp != NULL) 259 *ticksp = ((struct namecache_ts *)ncp)->nc_ticks; 260 } 261 262 static int doingcache = 1; /* 1 => enable the cache */ 263 SYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, 264 "VFS namecache enabled"); 265 266 /* Export size information to userland */ 267 SYSCTL_INT(_debug_sizeof, OID_AUTO, namecache, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, 268 sizeof(struct namecache), "sizeof(struct namecache)"); 269 270 /* 271 * The new name cache statistics 272 */ 273 static SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0, 274 "Name cache statistics"); 275 #define STATNODE(mode, name, var, descr) \ 276 SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, descr); 277 STATNODE(CTLFLAG_RD, numneg, &numneg, "Number of negative cache entries"); 278 STATNODE(CTLFLAG_RD, numcache, &numcache, "Number of cache entries"); 279 static u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls, 280 "Number of cache lookups"); 281 static u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits, 282 "Number of '.' hits"); 283 static u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits, 284 "Number of '..' hits"); 285 static u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks, 286 "Number of checks in lookup"); 287 static u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss, 288 "Number of cache misses"); 289 static u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap, 290 "Number of cache misses we do not want to cache"); 291 static u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps, 292 "Number of cache hits (positive) we do not want to cache"); 293 static u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits, 294 "Number of cache hits (positive)"); 295 static u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps, 296 "Number of cache hits (negative) we do not want to cache"); 297 static u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits, 298 "Number of cache hits (negative)"); 299 static u_long numupgrades; STATNODE(CTLFLAG_RD, numupgrades, &numupgrades, 300 "Number of updates of the cache after lookup (write lock + retry)"); 301 302 SYSCTL_OPAQUE(_vfs_cache, OID_AUTO, nchstats, CTLFLAG_RD | CTLFLAG_MPSAFE, 303 &nchstats, sizeof(nchstats), "LU", 304 "VFS cache effectiveness statistics"); 305 306 307 308 static void cache_zap(struct namecache *ncp); 309 static int vn_vptocnp_locked(struct vnode **vp, struct ucred *cred, char *buf, 310 u_int *buflen); 311 static int vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir, 312 char *buf, char **retbuf, u_int buflen); 313 314 static MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries"); 315 316 #ifdef DIAGNOSTIC 317 /* 318 * Grab an atomic snapshot of the name cache hash chain lengths 319 */ 320 static SYSCTL_NODE(_debug, OID_AUTO, hashstat, CTLFLAG_RW, NULL, 321 "hash table stats"); 322 323 static int 324 sysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS) 325 { 326 int error; 327 struct nchashhead *ncpp; 328 struct namecache *ncp; 329 int n_nchash; 330 int count; 331 332 n_nchash = nchash + 1; /* nchash is max index, not count */ 333 if (!req->oldptr) 334 return SYSCTL_OUT(req, 0, n_nchash * sizeof(int)); 335 336 /* Scan hash tables for applicable entries */ 337 for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { 338 CACHE_RLOCK(); 339 count = 0; 340 LIST_FOREACH(ncp, ncpp, nc_hash) { 341 count++; 342 } 343 CACHE_RUNLOCK(); 344 error = SYSCTL_OUT(req, &count, sizeof(count)); 345 if (error) 346 return (error); 347 } 348 return (0); 349 } 350 SYSCTL_PROC(_debug_hashstat, OID_AUTO, rawnchash, CTLTYPE_INT|CTLFLAG_RD| 351 CTLFLAG_MPSAFE, 0, 0, sysctl_debug_hashstat_rawnchash, "S,int", 352 "nchash chain lengths"); 353 354 static int 355 sysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS) 356 { 357 int error; 358 struct nchashhead *ncpp; 359 struct namecache *ncp; 360 int n_nchash; 361 int count, maxlength, used, pct; 362 363 if (!req->oldptr) 364 return SYSCTL_OUT(req, 0, 4 * sizeof(int)); 365 366 n_nchash = nchash + 1; /* nchash is max index, not count */ 367 used = 0; 368 maxlength = 0; 369 370 /* Scan hash tables for applicable entries */ 371 for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { 372 count = 0; 373 CACHE_RLOCK(); 374 LIST_FOREACH(ncp, ncpp, nc_hash) { 375 count++; 376 } 377 CACHE_RUNLOCK(); 378 if (count) 379 used++; 380 if (maxlength < count) 381 maxlength = count; 382 } 383 n_nchash = nchash + 1; 384 pct = (used * 100) / (n_nchash / 100); 385 error = SYSCTL_OUT(req, &n_nchash, sizeof(n_nchash)); 386 if (error) 387 return (error); 388 error = SYSCTL_OUT(req, &used, sizeof(used)); 389 if (error) 390 return (error); 391 error = SYSCTL_OUT(req, &maxlength, sizeof(maxlength)); 392 if (error) 393 return (error); 394 error = SYSCTL_OUT(req, &pct, sizeof(pct)); 395 if (error) 396 return (error); 397 return (0); 398 } 399 SYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE_INT|CTLFLAG_RD| 400 CTLFLAG_MPSAFE, 0, 0, sysctl_debug_hashstat_nchash, "I", 401 "nchash statistics (number of total/used buckets, maximum chain length, usage percentage)"); 402 #endif 403 404 /* 405 * cache_zap(): 406 * 407 * Removes a namecache entry from cache, whether it contains an actual 408 * pointer to a vnode or if it is just a negative cache entry. 409 */ 410 static void 411 cache_zap(ncp) 412 struct namecache *ncp; 413 { 414 struct vnode *vp; 415 416 rw_assert(&cache_lock, RA_WLOCKED); 417 CTR2(KTR_VFS, "cache_zap(%p) vp %p", ncp, ncp->nc_vp); 418 #ifdef KDTRACE_HOOKS 419 if (ncp->nc_vp != NULL) { 420 SDT_PROBE(vfs, namecache, zap, done, ncp->nc_dvp, 421 nc_get_name(ncp), ncp->nc_vp, 0, 0); 422 } else { 423 SDT_PROBE(vfs, namecache, zap_negative, done, ncp->nc_dvp, 424 nc_get_name(ncp), 0, 0, 0); 425 } 426 #endif 427 vp = NULL; 428 LIST_REMOVE(ncp, nc_hash); 429 if (ncp->nc_flag & NCF_ISDOTDOT) { 430 if (ncp == ncp->nc_dvp->v_cache_dd) 431 ncp->nc_dvp->v_cache_dd = NULL; 432 } else { 433 LIST_REMOVE(ncp, nc_src); 434 if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) { 435 vp = ncp->nc_dvp; 436 numcachehv--; 437 } 438 } 439 if (ncp->nc_vp) { 440 TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst); 441 if (ncp == ncp->nc_vp->v_cache_dd) 442 ncp->nc_vp->v_cache_dd = NULL; 443 } else { 444 TAILQ_REMOVE(&ncneg, ncp, nc_dst); 445 numneg--; 446 } 447 numcache--; 448 cache_free(ncp); 449 if (vp) 450 vdrop(vp); 451 } 452 453 /* 454 * Lookup an entry in the cache 455 * 456 * Lookup is called with dvp pointing to the directory to search, 457 * cnp pointing to the name of the entry being sought. If the lookup 458 * succeeds, the vnode is returned in *vpp, and a status of -1 is 459 * returned. If the lookup determines that the name does not exist 460 * (negative cacheing), a status of ENOENT is returned. If the lookup 461 * fails, a status of zero is returned. If the directory vnode is 462 * recycled out from under us due to a forced unmount, a status of 463 * ENOENT is returned. 464 * 465 * vpp is locked and ref'd on return. If we're looking up DOTDOT, dvp is 466 * unlocked. If we're looking up . an extra ref is taken, but the lock is 467 * not recursively acquired. 468 */ 469 470 int 471 cache_lookup(dvp, vpp, cnp, tsp, ticksp) 472 struct vnode *dvp; 473 struct vnode **vpp; 474 struct componentname *cnp; 475 struct timespec *tsp; 476 int *ticksp; 477 { 478 struct namecache *ncp; 479 uint32_t hash; 480 int error, ltype, wlocked; 481 482 if (!doingcache) { 483 cnp->cn_flags &= ~MAKEENTRY; 484 return (0); 485 } 486 retry: 487 CACHE_RLOCK(); 488 wlocked = 0; 489 numcalls++; 490 error = 0; 491 492 retry_wlocked: 493 if (cnp->cn_nameptr[0] == '.') { 494 if (cnp->cn_namelen == 1) { 495 *vpp = dvp; 496 CTR2(KTR_VFS, "cache_lookup(%p, %s) found via .", 497 dvp, cnp->cn_nameptr); 498 dothits++; 499 SDT_PROBE(vfs, namecache, lookup, hit, dvp, ".", 500 *vpp, 0, 0); 501 if (tsp != NULL) 502 timespecclear(tsp); 503 if (ticksp != NULL) 504 *ticksp = ticks; 505 goto success; 506 } 507 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 508 dotdothits++; 509 if (dvp->v_cache_dd == NULL) { 510 SDT_PROBE(vfs, namecache, lookup, miss, dvp, 511 "..", NULL, 0, 0); 512 goto unlock; 513 } 514 if ((cnp->cn_flags & MAKEENTRY) == 0) { 515 if (!wlocked && !CACHE_UPGRADE_LOCK()) 516 goto wlock; 517 if (dvp->v_cache_dd->nc_flag & NCF_ISDOTDOT) 518 cache_zap(dvp->v_cache_dd); 519 dvp->v_cache_dd = NULL; 520 CACHE_WUNLOCK(); 521 return (0); 522 } 523 ncp = dvp->v_cache_dd; 524 if (ncp->nc_flag & NCF_ISDOTDOT) 525 *vpp = ncp->nc_vp; 526 else 527 *vpp = ncp->nc_dvp; 528 /* Return failure if negative entry was found. */ 529 if (*vpp == NULL) 530 goto negative_success; 531 CTR3(KTR_VFS, "cache_lookup(%p, %s) found %p via ..", 532 dvp, cnp->cn_nameptr, *vpp); 533 SDT_PROBE(vfs, namecache, lookup, hit, dvp, "..", 534 *vpp, 0, 0); 535 cache_out_ts(ncp, tsp, ticksp); 536 if ((ncp->nc_flag & (NCF_ISDOTDOT | NCF_DTS)) == 537 NCF_DTS && tsp != NULL) 538 *tsp = ((struct namecache_ts *)ncp)-> 539 nc_dotdottime; 540 goto success; 541 } 542 } 543 544 hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT); 545 hash = fnv_32_buf(&dvp, sizeof(dvp), hash); 546 LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) { 547 numchecks++; 548 if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen && 549 !bcmp(nc_get_name(ncp), cnp->cn_nameptr, ncp->nc_nlen)) 550 break; 551 } 552 553 /* We failed to find an entry */ 554 if (ncp == NULL) { 555 SDT_PROBE(vfs, namecache, lookup, miss, dvp, cnp->cn_nameptr, 556 NULL, 0, 0); 557 if ((cnp->cn_flags & MAKEENTRY) == 0) { 558 nummisszap++; 559 } else { 560 nummiss++; 561 } 562 nchstats.ncs_miss++; 563 goto unlock; 564 } 565 566 /* We don't want to have an entry, so dump it */ 567 if ((cnp->cn_flags & MAKEENTRY) == 0) { 568 numposzaps++; 569 nchstats.ncs_badhits++; 570 if (!wlocked && !CACHE_UPGRADE_LOCK()) 571 goto wlock; 572 cache_zap(ncp); 573 CACHE_WUNLOCK(); 574 return (0); 575 } 576 577 /* We found a "positive" match, return the vnode */ 578 if (ncp->nc_vp) { 579 numposhits++; 580 nchstats.ncs_goodhits++; 581 *vpp = ncp->nc_vp; 582 CTR4(KTR_VFS, "cache_lookup(%p, %s) found %p via ncp %p", 583 dvp, cnp->cn_nameptr, *vpp, ncp); 584 SDT_PROBE(vfs, namecache, lookup, hit, dvp, nc_get_name(ncp), 585 *vpp, 0, 0); 586 cache_out_ts(ncp, tsp, ticksp); 587 goto success; 588 } 589 590 negative_success: 591 /* We found a negative match, and want to create it, so purge */ 592 if (cnp->cn_nameiop == CREATE) { 593 numnegzaps++; 594 nchstats.ncs_badhits++; 595 if (!wlocked && !CACHE_UPGRADE_LOCK()) 596 goto wlock; 597 cache_zap(ncp); 598 CACHE_WUNLOCK(); 599 return (0); 600 } 601 602 if (!wlocked && !CACHE_UPGRADE_LOCK()) 603 goto wlock; 604 numneghits++; 605 /* 606 * We found a "negative" match, so we shift it to the end of 607 * the "negative" cache entries queue to satisfy LRU. Also, 608 * check to see if the entry is a whiteout; indicate this to 609 * the componentname, if so. 610 */ 611 TAILQ_REMOVE(&ncneg, ncp, nc_dst); 612 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 613 nchstats.ncs_neghits++; 614 if (ncp->nc_flag & NCF_WHITE) 615 cnp->cn_flags |= ISWHITEOUT; 616 SDT_PROBE(vfs, namecache, lookup, hit__negative, dvp, nc_get_name(ncp), 617 0, 0, 0); 618 cache_out_ts(ncp, tsp, ticksp); 619 CACHE_WUNLOCK(); 620 return (ENOENT); 621 622 wlock: 623 /* 624 * We need to update the cache after our lookup, so upgrade to 625 * a write lock and retry the operation. 626 */ 627 CACHE_RUNLOCK(); 628 CACHE_WLOCK(); 629 numupgrades++; 630 wlocked = 1; 631 goto retry_wlocked; 632 633 success: 634 /* 635 * On success we return a locked and ref'd vnode as per the lookup 636 * protocol. 637 */ 638 if (dvp == *vpp) { /* lookup on "." */ 639 VREF(*vpp); 640 if (wlocked) 641 CACHE_WUNLOCK(); 642 else 643 CACHE_RUNLOCK(); 644 /* 645 * When we lookup "." we still can be asked to lock it 646 * differently... 647 */ 648 ltype = cnp->cn_lkflags & LK_TYPE_MASK; 649 if (ltype != VOP_ISLOCKED(*vpp)) { 650 if (ltype == LK_EXCLUSIVE) { 651 vn_lock(*vpp, LK_UPGRADE | LK_RETRY); 652 if ((*vpp)->v_iflag & VI_DOOMED) { 653 /* forced unmount */ 654 vrele(*vpp); 655 *vpp = NULL; 656 return (ENOENT); 657 } 658 } else 659 vn_lock(*vpp, LK_DOWNGRADE | LK_RETRY); 660 } 661 return (-1); 662 } 663 ltype = 0; /* silence gcc warning */ 664 if (cnp->cn_flags & ISDOTDOT) { 665 ltype = VOP_ISLOCKED(dvp); 666 VOP_UNLOCK(dvp, 0); 667 } 668 VI_LOCK(*vpp); 669 if (wlocked) 670 CACHE_WUNLOCK(); 671 else 672 CACHE_RUNLOCK(); 673 error = vget(*vpp, cnp->cn_lkflags | LK_INTERLOCK, cnp->cn_thread); 674 if (cnp->cn_flags & ISDOTDOT) { 675 vn_lock(dvp, ltype | LK_RETRY); 676 if (dvp->v_iflag & VI_DOOMED) { 677 if (error == 0) 678 vput(*vpp); 679 *vpp = NULL; 680 return (ENOENT); 681 } 682 } 683 if (error) { 684 *vpp = NULL; 685 goto retry; 686 } 687 if ((cnp->cn_flags & ISLASTCN) && 688 (cnp->cn_lkflags & LK_TYPE_MASK) == LK_EXCLUSIVE) { 689 ASSERT_VOP_ELOCKED(*vpp, "cache_lookup"); 690 } 691 return (-1); 692 693 unlock: 694 if (wlocked) 695 CACHE_WUNLOCK(); 696 else 697 CACHE_RUNLOCK(); 698 return (0); 699 } 700 701 /* 702 * Add an entry to the cache. 703 */ 704 void 705 cache_enter_time(dvp, vp, cnp, tsp, dtsp) 706 struct vnode *dvp; 707 struct vnode *vp; 708 struct componentname *cnp; 709 struct timespec *tsp; 710 struct timespec *dtsp; 711 { 712 struct namecache *ncp, *n2; 713 struct namecache_ts *n3; 714 struct nchashhead *ncpp; 715 uint32_t hash; 716 int flag; 717 int hold; 718 int zap; 719 int len; 720 721 CTR3(KTR_VFS, "cache_enter(%p, %p, %s)", dvp, vp, cnp->cn_nameptr); 722 VNASSERT(vp == NULL || (vp->v_iflag & VI_DOOMED) == 0, vp, 723 ("cache_enter: Adding a doomed vnode")); 724 VNASSERT(dvp == NULL || (dvp->v_iflag & VI_DOOMED) == 0, dvp, 725 ("cache_enter: Doomed vnode used as src")); 726 727 if (!doingcache) 728 return; 729 730 /* 731 * Avoid blowout in namecache entries. 732 */ 733 if (numcache >= desiredvnodes * ncsizefactor) 734 return; 735 736 flag = 0; 737 if (cnp->cn_nameptr[0] == '.') { 738 if (cnp->cn_namelen == 1) 739 return; 740 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 741 CACHE_WLOCK(); 742 /* 743 * If dotdot entry already exists, just retarget it 744 * to new parent vnode, otherwise continue with new 745 * namecache entry allocation. 746 */ 747 if ((ncp = dvp->v_cache_dd) != NULL && 748 ncp->nc_flag & NCF_ISDOTDOT) { 749 KASSERT(ncp->nc_dvp == dvp, 750 ("wrong isdotdot parent")); 751 if (ncp->nc_vp != NULL) { 752 TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, 753 ncp, nc_dst); 754 } else { 755 TAILQ_REMOVE(&ncneg, ncp, nc_dst); 756 numneg--; 757 } 758 if (vp != NULL) { 759 TAILQ_INSERT_HEAD(&vp->v_cache_dst, 760 ncp, nc_dst); 761 } else { 762 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 763 numneg++; 764 } 765 ncp->nc_vp = vp; 766 CACHE_WUNLOCK(); 767 return; 768 } 769 dvp->v_cache_dd = NULL; 770 SDT_PROBE(vfs, namecache, enter, done, dvp, "..", vp, 771 0, 0); 772 CACHE_WUNLOCK(); 773 flag = NCF_ISDOTDOT; 774 } 775 } 776 777 hold = 0; 778 zap = 0; 779 780 /* 781 * Calculate the hash key and setup as much of the new 782 * namecache entry as possible before acquiring the lock. 783 */ 784 ncp = cache_alloc(cnp->cn_namelen, tsp != NULL); 785 ncp->nc_vp = vp; 786 ncp->nc_dvp = dvp; 787 ncp->nc_flag = flag; 788 if (tsp != NULL) { 789 n3 = (struct namecache_ts *)ncp; 790 n3->nc_time = *tsp; 791 n3->nc_ticks = ticks; 792 n3->nc_flag |= NCF_TS; 793 if (dtsp != NULL) { 794 n3->nc_dotdottime = *dtsp; 795 n3->nc_flag |= NCF_DTS; 796 } 797 } 798 len = ncp->nc_nlen = cnp->cn_namelen; 799 hash = fnv_32_buf(cnp->cn_nameptr, len, FNV1_32_INIT); 800 strlcpy(nc_get_name(ncp), cnp->cn_nameptr, len + 1); 801 hash = fnv_32_buf(&dvp, sizeof(dvp), hash); 802 CACHE_WLOCK(); 803 804 /* 805 * See if this vnode or negative entry is already in the cache 806 * with this name. This can happen with concurrent lookups of 807 * the same path name. 808 */ 809 ncpp = NCHHASH(hash); 810 LIST_FOREACH(n2, ncpp, nc_hash) { 811 if (n2->nc_dvp == dvp && 812 n2->nc_nlen == cnp->cn_namelen && 813 !bcmp(nc_get_name(n2), cnp->cn_nameptr, n2->nc_nlen)) { 814 if (tsp != NULL) { 815 KASSERT((n2->nc_flag & NCF_TS) != 0, 816 ("no NCF_TS")); 817 n3 = (struct namecache_ts *)n2; 818 n3->nc_time = 819 ((struct namecache_ts *)ncp)->nc_time; 820 n3->nc_ticks = 821 ((struct namecache_ts *)ncp)->nc_ticks; 822 if (dtsp != NULL) { 823 n3->nc_dotdottime = 824 ((struct namecache_ts *)ncp)-> 825 nc_dotdottime; 826 n3->nc_flag |= NCF_DTS; 827 } 828 } 829 CACHE_WUNLOCK(); 830 cache_free(ncp); 831 return; 832 } 833 } 834 835 if (flag == NCF_ISDOTDOT) { 836 /* 837 * See if we are trying to add .. entry, but some other lookup 838 * has populated v_cache_dd pointer already. 839 */ 840 if (dvp->v_cache_dd != NULL) { 841 CACHE_WUNLOCK(); 842 cache_free(ncp); 843 return; 844 } 845 KASSERT(vp == NULL || vp->v_type == VDIR, 846 ("wrong vnode type %p", vp)); 847 dvp->v_cache_dd = ncp; 848 } 849 850 numcache++; 851 if (!vp) { 852 numneg++; 853 if (cnp->cn_flags & ISWHITEOUT) 854 ncp->nc_flag |= NCF_WHITE; 855 } else if (vp->v_type == VDIR) { 856 if (flag != NCF_ISDOTDOT) { 857 /* 858 * For this case, the cache entry maps both the 859 * directory name in it and the name ".." for the 860 * directory's parent. 861 */ 862 if ((n2 = vp->v_cache_dd) != NULL && 863 (n2->nc_flag & NCF_ISDOTDOT) != 0) 864 cache_zap(n2); 865 vp->v_cache_dd = ncp; 866 } 867 } else { 868 vp->v_cache_dd = NULL; 869 } 870 871 /* 872 * Insert the new namecache entry into the appropriate chain 873 * within the cache entries table. 874 */ 875 LIST_INSERT_HEAD(ncpp, ncp, nc_hash); 876 if (flag != NCF_ISDOTDOT) { 877 if (LIST_EMPTY(&dvp->v_cache_src)) { 878 hold = 1; 879 numcachehv++; 880 } 881 LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src); 882 } 883 884 /* 885 * If the entry is "negative", we place it into the 886 * "negative" cache queue, otherwise, we place it into the 887 * destination vnode's cache entries queue. 888 */ 889 if (vp) { 890 TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst); 891 SDT_PROBE(vfs, namecache, enter, done, dvp, nc_get_name(ncp), 892 vp, 0, 0); 893 } else { 894 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 895 SDT_PROBE(vfs, namecache, enter_negative, done, dvp, 896 nc_get_name(ncp), 0, 0, 0); 897 } 898 if (numneg * ncnegfactor > numcache) { 899 ncp = TAILQ_FIRST(&ncneg); 900 KASSERT(ncp->nc_vp == NULL, ("ncp %p vp %p on ncneg", 901 ncp, ncp->nc_vp)); 902 zap = 1; 903 } 904 if (hold) 905 vhold(dvp); 906 if (zap) 907 cache_zap(ncp); 908 CACHE_WUNLOCK(); 909 } 910 911 /* 912 * Name cache initialization, from vfs_init() when we are booting 913 */ 914 static void 915 nchinit(void *dummy __unused) 916 { 917 918 TAILQ_INIT(&ncneg); 919 920 cache_zone_small = uma_zcreate("S VFS Cache", 921 sizeof(struct namecache) + CACHE_PATH_CUTOFF + 1, 922 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT); 923 cache_zone_small_ts = uma_zcreate("STS VFS Cache", 924 sizeof(struct namecache_ts) + CACHE_PATH_CUTOFF + 1, 925 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT); 926 cache_zone_large = uma_zcreate("L VFS Cache", 927 sizeof(struct namecache) + NAME_MAX + 1, 928 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT); 929 cache_zone_large_ts = uma_zcreate("LTS VFS Cache", 930 sizeof(struct namecache_ts) + NAME_MAX + 1, 931 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT); 932 933 nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash); 934 } 935 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL); 936 937 938 /* 939 * Invalidate all entries to a particular vnode. 940 */ 941 void 942 cache_purge(vp) 943 struct vnode *vp; 944 { 945 946 CTR1(KTR_VFS, "cache_purge(%p)", vp); 947 SDT_PROBE(vfs, namecache, purge, done, vp, 0, 0, 0, 0); 948 CACHE_WLOCK(); 949 while (!LIST_EMPTY(&vp->v_cache_src)) 950 cache_zap(LIST_FIRST(&vp->v_cache_src)); 951 while (!TAILQ_EMPTY(&vp->v_cache_dst)) 952 cache_zap(TAILQ_FIRST(&vp->v_cache_dst)); 953 if (vp->v_cache_dd != NULL) { 954 KASSERT(vp->v_cache_dd->nc_flag & NCF_ISDOTDOT, 955 ("lost dotdot link")); 956 cache_zap(vp->v_cache_dd); 957 } 958 KASSERT(vp->v_cache_dd == NULL, ("incomplete purge")); 959 CACHE_WUNLOCK(); 960 } 961 962 /* 963 * Invalidate all negative entries for a particular directory vnode. 964 */ 965 void 966 cache_purge_negative(vp) 967 struct vnode *vp; 968 { 969 struct namecache *cp, *ncp; 970 971 CTR1(KTR_VFS, "cache_purge_negative(%p)", vp); 972 SDT_PROBE(vfs, namecache, purge_negative, done, vp, 0, 0, 0, 0); 973 CACHE_WLOCK(); 974 LIST_FOREACH_SAFE(cp, &vp->v_cache_src, nc_src, ncp) { 975 if (cp->nc_vp == NULL) 976 cache_zap(cp); 977 } 978 CACHE_WUNLOCK(); 979 } 980 981 /* 982 * Flush all entries referencing a particular filesystem. 983 */ 984 void 985 cache_purgevfs(mp) 986 struct mount *mp; 987 { 988 struct nchashhead *ncpp; 989 struct namecache *ncp, *nnp; 990 991 /* Scan hash tables for applicable entries */ 992 SDT_PROBE(vfs, namecache, purgevfs, done, mp, 0, 0, 0, 0); 993 CACHE_WLOCK(); 994 for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) { 995 LIST_FOREACH_SAFE(ncp, ncpp, nc_hash, nnp) { 996 if (ncp->nc_dvp->v_mount == mp) 997 cache_zap(ncp); 998 } 999 } 1000 CACHE_WUNLOCK(); 1001 } 1002 1003 /* 1004 * Perform canonical checks and cache lookup and pass on to filesystem 1005 * through the vop_cachedlookup only if needed. 1006 */ 1007 1008 int 1009 vfs_cache_lookup(ap) 1010 struct vop_lookup_args /* { 1011 struct vnode *a_dvp; 1012 struct vnode **a_vpp; 1013 struct componentname *a_cnp; 1014 } */ *ap; 1015 { 1016 struct vnode *dvp; 1017 int error; 1018 struct vnode **vpp = ap->a_vpp; 1019 struct componentname *cnp = ap->a_cnp; 1020 struct ucred *cred = cnp->cn_cred; 1021 int flags = cnp->cn_flags; 1022 struct thread *td = cnp->cn_thread; 1023 1024 *vpp = NULL; 1025 dvp = ap->a_dvp; 1026 1027 if (dvp->v_type != VDIR) 1028 return (ENOTDIR); 1029 1030 if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && 1031 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) 1032 return (EROFS); 1033 1034 error = VOP_ACCESS(dvp, VEXEC, cred, td); 1035 if (error) 1036 return (error); 1037 1038 error = cache_lookup(dvp, vpp, cnp, NULL, NULL); 1039 if (error == 0) 1040 return (VOP_CACHEDLOOKUP(dvp, vpp, cnp)); 1041 if (error == -1) 1042 return (0); 1043 return (error); 1044 } 1045 1046 1047 #ifndef _SYS_SYSPROTO_H_ 1048 struct __getcwd_args { 1049 u_char *buf; 1050 u_int buflen; 1051 }; 1052 #endif 1053 1054 /* 1055 * XXX All of these sysctls would probably be more productive dead. 1056 */ 1057 static int disablecwd; 1058 SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0, 1059 "Disable the getcwd syscall"); 1060 1061 /* Implementation of the getcwd syscall. */ 1062 int 1063 sys___getcwd(td, uap) 1064 struct thread *td; 1065 struct __getcwd_args *uap; 1066 { 1067 1068 return (kern___getcwd(td, uap->buf, UIO_USERSPACE, uap->buflen)); 1069 } 1070 1071 int 1072 kern___getcwd(struct thread *td, u_char *buf, enum uio_seg bufseg, u_int buflen) 1073 { 1074 char *bp, *tmpbuf; 1075 struct filedesc *fdp; 1076 struct vnode *cdir, *rdir; 1077 int error; 1078 1079 if (disablecwd) 1080 return (ENODEV); 1081 if (buflen < 2) 1082 return (EINVAL); 1083 if (buflen > MAXPATHLEN) 1084 buflen = MAXPATHLEN; 1085 1086 tmpbuf = malloc(buflen, M_TEMP, M_WAITOK); 1087 fdp = td->td_proc->p_fd; 1088 FILEDESC_SLOCK(fdp); 1089 cdir = fdp->fd_cdir; 1090 VREF(cdir); 1091 rdir = fdp->fd_rdir; 1092 VREF(rdir); 1093 FILEDESC_SUNLOCK(fdp); 1094 error = vn_fullpath1(td, cdir, rdir, tmpbuf, &bp, buflen); 1095 vrele(rdir); 1096 vrele(cdir); 1097 1098 if (!error) { 1099 if (bufseg == UIO_SYSSPACE) 1100 bcopy(bp, buf, strlen(bp) + 1); 1101 else 1102 error = copyout(bp, buf, strlen(bp) + 1); 1103 #ifdef KTRACE 1104 if (KTRPOINT(curthread, KTR_NAMEI)) 1105 ktrnamei(bp); 1106 #endif 1107 } 1108 free(tmpbuf, M_TEMP); 1109 return (error); 1110 } 1111 1112 /* 1113 * Thus begins the fullpath magic. 1114 */ 1115 1116 #undef STATNODE 1117 #define STATNODE(name, descr) \ 1118 static u_int name; \ 1119 SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, descr) 1120 1121 static int disablefullpath; 1122 SYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, &disablefullpath, 0, 1123 "Disable the vn_fullpath function"); 1124 1125 /* These count for kern___getcwd(), too. */ 1126 STATNODE(numfullpathcalls, "Number of fullpath search calls"); 1127 STATNODE(numfullpathfail1, "Number of fullpath search errors (ENOTDIR)"); 1128 STATNODE(numfullpathfail2, 1129 "Number of fullpath search errors (VOP_VPTOCNP failures)"); 1130 STATNODE(numfullpathfail4, "Number of fullpath search errors (ENOMEM)"); 1131 STATNODE(numfullpathfound, "Number of successful fullpath calls"); 1132 1133 /* 1134 * Retrieve the full filesystem path that correspond to a vnode from the name 1135 * cache (if available) 1136 */ 1137 int 1138 vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf) 1139 { 1140 char *buf; 1141 struct filedesc *fdp; 1142 struct vnode *rdir; 1143 int error; 1144 1145 if (disablefullpath) 1146 return (ENODEV); 1147 if (vn == NULL) 1148 return (EINVAL); 1149 1150 buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 1151 fdp = td->td_proc->p_fd; 1152 FILEDESC_SLOCK(fdp); 1153 rdir = fdp->fd_rdir; 1154 VREF(rdir); 1155 FILEDESC_SUNLOCK(fdp); 1156 error = vn_fullpath1(td, vn, rdir, buf, retbuf, MAXPATHLEN); 1157 vrele(rdir); 1158 1159 if (!error) 1160 *freebuf = buf; 1161 else 1162 free(buf, M_TEMP); 1163 return (error); 1164 } 1165 1166 /* 1167 * This function is similar to vn_fullpath, but it attempts to lookup the 1168 * pathname relative to the global root mount point. This is required for the 1169 * auditing sub-system, as audited pathnames must be absolute, relative to the 1170 * global root mount point. 1171 */ 1172 int 1173 vn_fullpath_global(struct thread *td, struct vnode *vn, 1174 char **retbuf, char **freebuf) 1175 { 1176 char *buf; 1177 int error; 1178 1179 if (disablefullpath) 1180 return (ENODEV); 1181 if (vn == NULL) 1182 return (EINVAL); 1183 buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 1184 error = vn_fullpath1(td, vn, rootvnode, buf, retbuf, MAXPATHLEN); 1185 if (!error) 1186 *freebuf = buf; 1187 else 1188 free(buf, M_TEMP); 1189 return (error); 1190 } 1191 1192 int 1193 vn_vptocnp(struct vnode **vp, struct ucred *cred, char *buf, u_int *buflen) 1194 { 1195 int error; 1196 1197 CACHE_RLOCK(); 1198 error = vn_vptocnp_locked(vp, cred, buf, buflen); 1199 if (error == 0) 1200 CACHE_RUNLOCK(); 1201 return (error); 1202 } 1203 1204 static int 1205 vn_vptocnp_locked(struct vnode **vp, struct ucred *cred, char *buf, 1206 u_int *buflen) 1207 { 1208 struct vnode *dvp; 1209 struct namecache *ncp; 1210 int error; 1211 1212 TAILQ_FOREACH(ncp, &((*vp)->v_cache_dst), nc_dst) { 1213 if ((ncp->nc_flag & NCF_ISDOTDOT) == 0) 1214 break; 1215 } 1216 if (ncp != NULL) { 1217 if (*buflen < ncp->nc_nlen) { 1218 CACHE_RUNLOCK(); 1219 vrele(*vp); 1220 numfullpathfail4++; 1221 error = ENOMEM; 1222 SDT_PROBE(vfs, namecache, fullpath, return, error, 1223 vp, NULL, 0, 0); 1224 return (error); 1225 } 1226 *buflen -= ncp->nc_nlen; 1227 memcpy(buf + *buflen, nc_get_name(ncp), ncp->nc_nlen); 1228 SDT_PROBE(vfs, namecache, fullpath, hit, ncp->nc_dvp, 1229 nc_get_name(ncp), vp, 0, 0); 1230 dvp = *vp; 1231 *vp = ncp->nc_dvp; 1232 vref(*vp); 1233 CACHE_RUNLOCK(); 1234 vrele(dvp); 1235 CACHE_RLOCK(); 1236 return (0); 1237 } 1238 SDT_PROBE(vfs, namecache, fullpath, miss, vp, 0, 0, 0, 0); 1239 1240 CACHE_RUNLOCK(); 1241 vn_lock(*vp, LK_SHARED | LK_RETRY); 1242 error = VOP_VPTOCNP(*vp, &dvp, cred, buf, buflen); 1243 vput(*vp); 1244 if (error) { 1245 numfullpathfail2++; 1246 SDT_PROBE(vfs, namecache, fullpath, return, error, vp, 1247 NULL, 0, 0); 1248 return (error); 1249 } 1250 1251 *vp = dvp; 1252 CACHE_RLOCK(); 1253 if (dvp->v_iflag & VI_DOOMED) { 1254 /* forced unmount */ 1255 CACHE_RUNLOCK(); 1256 vrele(dvp); 1257 error = ENOENT; 1258 SDT_PROBE(vfs, namecache, fullpath, return, error, vp, 1259 NULL, 0, 0); 1260 return (error); 1261 } 1262 /* 1263 * *vp has its use count incremented still. 1264 */ 1265 1266 return (0); 1267 } 1268 1269 /* 1270 * The magic behind kern___getcwd() and vn_fullpath(). 1271 */ 1272 static int 1273 vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir, 1274 char *buf, char **retbuf, u_int buflen) 1275 { 1276 int error, slash_prefixed; 1277 #ifdef KDTRACE_HOOKS 1278 struct vnode *startvp = vp; 1279 #endif 1280 struct vnode *vp1; 1281 1282 buflen--; 1283 buf[buflen] = '\0'; 1284 error = 0; 1285 slash_prefixed = 0; 1286 1287 SDT_PROBE(vfs, namecache, fullpath, entry, vp, 0, 0, 0, 0); 1288 numfullpathcalls++; 1289 vref(vp); 1290 CACHE_RLOCK(); 1291 if (vp->v_type != VDIR) { 1292 error = vn_vptocnp_locked(&vp, td->td_ucred, buf, &buflen); 1293 if (error) 1294 return (error); 1295 if (buflen == 0) { 1296 CACHE_RUNLOCK(); 1297 vrele(vp); 1298 return (ENOMEM); 1299 } 1300 buf[--buflen] = '/'; 1301 slash_prefixed = 1; 1302 } 1303 while (vp != rdir && vp != rootvnode) { 1304 if (vp->v_vflag & VV_ROOT) { 1305 if (vp->v_iflag & VI_DOOMED) { /* forced unmount */ 1306 CACHE_RUNLOCK(); 1307 vrele(vp); 1308 error = ENOENT; 1309 SDT_PROBE(vfs, namecache, fullpath, return, 1310 error, vp, NULL, 0, 0); 1311 break; 1312 } 1313 vp1 = vp->v_mount->mnt_vnodecovered; 1314 vref(vp1); 1315 CACHE_RUNLOCK(); 1316 vrele(vp); 1317 vp = vp1; 1318 CACHE_RLOCK(); 1319 continue; 1320 } 1321 if (vp->v_type != VDIR) { 1322 CACHE_RUNLOCK(); 1323 vrele(vp); 1324 numfullpathfail1++; 1325 error = ENOTDIR; 1326 SDT_PROBE(vfs, namecache, fullpath, return, 1327 error, vp, NULL, 0, 0); 1328 break; 1329 } 1330 error = vn_vptocnp_locked(&vp, td->td_ucred, buf, &buflen); 1331 if (error) 1332 break; 1333 if (buflen == 0) { 1334 CACHE_RUNLOCK(); 1335 vrele(vp); 1336 error = ENOMEM; 1337 SDT_PROBE(vfs, namecache, fullpath, return, error, 1338 startvp, NULL, 0, 0); 1339 break; 1340 } 1341 buf[--buflen] = '/'; 1342 slash_prefixed = 1; 1343 } 1344 if (error) 1345 return (error); 1346 if (!slash_prefixed) { 1347 if (buflen == 0) { 1348 CACHE_RUNLOCK(); 1349 vrele(vp); 1350 numfullpathfail4++; 1351 SDT_PROBE(vfs, namecache, fullpath, return, ENOMEM, 1352 startvp, NULL, 0, 0); 1353 return (ENOMEM); 1354 } 1355 buf[--buflen] = '/'; 1356 } 1357 numfullpathfound++; 1358 CACHE_RUNLOCK(); 1359 vrele(vp); 1360 1361 SDT_PROBE(vfs, namecache, fullpath, return, 0, startvp, buf + buflen, 1362 0, 0); 1363 *retbuf = buf + buflen; 1364 return (0); 1365 } 1366 1367 struct vnode * 1368 vn_dir_dd_ino(struct vnode *vp) 1369 { 1370 struct namecache *ncp; 1371 struct vnode *ddvp; 1372 1373 ASSERT_VOP_LOCKED(vp, "vn_dir_dd_ino"); 1374 CACHE_RLOCK(); 1375 TAILQ_FOREACH(ncp, &(vp->v_cache_dst), nc_dst) { 1376 if ((ncp->nc_flag & NCF_ISDOTDOT) != 0) 1377 continue; 1378 ddvp = ncp->nc_dvp; 1379 VI_LOCK(ddvp); 1380 CACHE_RUNLOCK(); 1381 if (vget(ddvp, LK_INTERLOCK | LK_SHARED | LK_NOWAIT, curthread)) 1382 return (NULL); 1383 return (ddvp); 1384 } 1385 CACHE_RUNLOCK(); 1386 return (NULL); 1387 } 1388 1389 int 1390 vn_commname(struct vnode *vp, char *buf, u_int buflen) 1391 { 1392 struct namecache *ncp; 1393 int l; 1394 1395 CACHE_RLOCK(); 1396 TAILQ_FOREACH(ncp, &vp->v_cache_dst, nc_dst) 1397 if ((ncp->nc_flag & NCF_ISDOTDOT) == 0) 1398 break; 1399 if (ncp == NULL) { 1400 CACHE_RUNLOCK(); 1401 return (ENOENT); 1402 } 1403 l = min(ncp->nc_nlen, buflen - 1); 1404 memcpy(buf, nc_get_name(ncp), l); 1405 CACHE_RUNLOCK(); 1406 buf[l] = '\0'; 1407 return (0); 1408 } 1409 1410 /* ABI compat shims for old kernel modules. */ 1411 #undef cache_enter 1412 1413 void cache_enter(struct vnode *dvp, struct vnode *vp, 1414 struct componentname *cnp); 1415 1416 void 1417 cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp) 1418 { 1419 1420 cache_enter_time(dvp, vp, cnp, NULL, NULL); 1421 } 1422 1423 /* 1424 * This function updates path string to vnode's full global path 1425 * and checks the size of the new path string against the pathlen argument. 1426 * 1427 * Requires a locked, referenced vnode. 1428 * Vnode is re-locked on success or ENODEV, otherwise unlocked. 1429 * 1430 * If sysctl debug.disablefullpath is set, ENODEV is returned, 1431 * vnode is left locked and path remain untouched. 1432 * 1433 * If vp is a directory, the call to vn_fullpath_global() always succeeds 1434 * because it falls back to the ".." lookup if the namecache lookup fails. 1435 */ 1436 int 1437 vn_path_to_global_path(struct thread *td, struct vnode *vp, char *path, 1438 u_int pathlen) 1439 { 1440 struct nameidata nd; 1441 struct vnode *vp1; 1442 char *rpath, *fbuf; 1443 int error; 1444 1445 ASSERT_VOP_ELOCKED(vp, __func__); 1446 1447 /* Return ENODEV if sysctl debug.disablefullpath==1 */ 1448 if (disablefullpath) 1449 return (ENODEV); 1450 1451 /* Construct global filesystem path from vp. */ 1452 VOP_UNLOCK(vp, 0); 1453 error = vn_fullpath_global(td, vp, &rpath, &fbuf); 1454 1455 if (error != 0) { 1456 vrele(vp); 1457 return (error); 1458 } 1459 1460 if (strlen(rpath) >= pathlen) { 1461 vrele(vp); 1462 error = ENAMETOOLONG; 1463 goto out; 1464 } 1465 1466 /* 1467 * Re-lookup the vnode by path to detect a possible rename. 1468 * As a side effect, the vnode is relocked. 1469 * If vnode was renamed, return ENOENT. 1470 */ 1471 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, 1472 UIO_SYSSPACE, path, td); 1473 error = namei(&nd); 1474 if (error != 0) { 1475 vrele(vp); 1476 goto out; 1477 } 1478 NDFREE(&nd, NDF_ONLY_PNBUF); 1479 vp1 = nd.ni_vp; 1480 vrele(vp); 1481 if (vp1 == vp) 1482 strcpy(path, rpath); 1483 else { 1484 vput(vp1); 1485 error = ENOENT; 1486 } 1487 1488 out: 1489 free(fbuf, M_TEMP); 1490 return (error); 1491 } 1492