1 /*- 2 * Copyright (c) 1989, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Poul-Henning Kamp of the FreeBSD Project. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include "opt_ktrace.h" 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/filedesc.h> 43 #include <sys/fnv_hash.h> 44 #include <sys/kernel.h> 45 #include <sys/lock.h> 46 #include <sys/malloc.h> 47 #include <sys/fcntl.h> 48 #include <sys/mount.h> 49 #include <sys/namei.h> 50 #include <sys/proc.h> 51 #include <sys/rwlock.h> 52 #include <sys/sdt.h> 53 #include <sys/syscallsubr.h> 54 #include <sys/sysctl.h> 55 #include <sys/sysproto.h> 56 #include <sys/vnode.h> 57 #ifdef KTRACE 58 #include <sys/ktrace.h> 59 #endif 60 61 #include <vm/uma.h> 62 63 SDT_PROVIDER_DECLARE(vfs); 64 SDT_PROBE_DEFINE3(vfs, namecache, enter, done, "struct vnode *", "char *", 65 "struct vnode *"); 66 SDT_PROBE_DEFINE2(vfs, namecache, enter_negative, done, "struct vnode *", 67 "char *"); 68 SDT_PROBE_DEFINE1(vfs, namecache, fullpath, entry, "struct vnode *"); 69 SDT_PROBE_DEFINE3(vfs, namecache, fullpath, hit, "struct vnode *", 70 "char *", "struct vnode *"); 71 SDT_PROBE_DEFINE1(vfs, namecache, fullpath, miss, "struct vnode *"); 72 SDT_PROBE_DEFINE3(vfs, namecache, fullpath, return, "int", 73 "struct vnode *", "char *"); 74 SDT_PROBE_DEFINE3(vfs, namecache, lookup, hit, "struct vnode *", "char *", 75 "struct vnode *"); 76 SDT_PROBE_DEFINE2(vfs, namecache, lookup, hit__negative, 77 "struct vnode *", "char *"); 78 SDT_PROBE_DEFINE2(vfs, namecache, lookup, miss, "struct vnode *", 79 "char *"); 80 SDT_PROBE_DEFINE1(vfs, namecache, purge, done, "struct vnode *"); 81 SDT_PROBE_DEFINE1(vfs, namecache, purge_negative, done, "struct vnode *"); 82 SDT_PROBE_DEFINE1(vfs, namecache, purgevfs, done, "struct mount *"); 83 SDT_PROBE_DEFINE3(vfs, namecache, zap, done, "struct vnode *", "char *", 84 "struct vnode *"); 85 SDT_PROBE_DEFINE2(vfs, namecache, zap_negative, done, "struct vnode *", 86 "char *"); 87 88 /* 89 * This structure describes the elements in the cache of recent 90 * names looked up by namei. 91 */ 92 93 struct namecache { 94 LIST_ENTRY(namecache) nc_hash; /* hash chain */ 95 LIST_ENTRY(namecache) nc_src; /* source vnode list */ 96 TAILQ_ENTRY(namecache) nc_dst; /* destination vnode list */ 97 struct vnode *nc_dvp; /* vnode of parent of name */ 98 struct vnode *nc_vp; /* vnode the name refers to */ 99 u_char nc_flag; /* flag bits */ 100 u_char nc_nlen; /* length of name */ 101 char nc_name[0]; /* segment name + nul */ 102 }; 103 104 /* 105 * struct namecache_ts repeats struct namecache layout up to the 106 * nc_nlen member. 107 * struct namecache_ts is used in place of struct namecache when time(s) need 108 * to be stored. The nc_dotdottime field is used when a cache entry is mapping 109 * both a non-dotdot directory name plus dotdot for the directory's 110 * parent. 111 */ 112 struct namecache_ts { 113 LIST_ENTRY(namecache) nc_hash; /* hash chain */ 114 LIST_ENTRY(namecache) nc_src; /* source vnode list */ 115 TAILQ_ENTRY(namecache) nc_dst; /* destination vnode list */ 116 struct vnode *nc_dvp; /* vnode of parent of name */ 117 struct vnode *nc_vp; /* vnode the name refers to */ 118 u_char nc_flag; /* flag bits */ 119 u_char nc_nlen; /* length of name */ 120 struct timespec nc_time; /* timespec provided by fs */ 121 struct timespec nc_dotdottime; /* dotdot timespec provided by fs */ 122 int nc_ticks; /* ticks value when entry was added */ 123 char nc_name[0]; /* segment name + nul */ 124 }; 125 126 /* 127 * Flags in namecache.nc_flag 128 */ 129 #define NCF_WHITE 0x01 130 #define NCF_ISDOTDOT 0x02 131 #define NCF_TS 0x04 132 #define NCF_DTS 0x08 133 134 /* 135 * Name caching works as follows: 136 * 137 * Names found by directory scans are retained in a cache 138 * for future reference. It is managed LRU, so frequently 139 * used names will hang around. Cache is indexed by hash value 140 * obtained from (vp, name) where vp refers to the directory 141 * containing name. 142 * 143 * If it is a "negative" entry, (i.e. for a name that is known NOT to 144 * exist) the vnode pointer will be NULL. 145 * 146 * Upon reaching the last segment of a path, if the reference 147 * is for DELETE, or NOCACHE is set (rewrite), and the 148 * name is located in the cache, it will be dropped. 149 */ 150 151 /* 152 * Structures associated with name cacheing. 153 */ 154 #define NCHHASH(hash) \ 155 (&nchashtbl[(hash) & nchash]) 156 static LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */ 157 static TAILQ_HEAD(, namecache) ncneg; /* Hash Table */ 158 static u_long nchash; /* size of hash table */ 159 SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, 160 "Size of namecache hash table"); 161 static u_long ncnegfactor = 16; /* ratio of negative entries */ 162 SYSCTL_ULONG(_vfs, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, 163 "Ratio of negative namecache entries"); 164 static u_long numneg; /* number of negative entries allocated */ 165 SYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, 166 "Number of negative entries in namecache"); 167 static u_long numcache; /* number of cache entries allocated */ 168 SYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, 169 "Number of namecache entries"); 170 static u_long numcachehv; /* number of cache entries with vnodes held */ 171 SYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0, 172 "Number of namecache entries with vnodes held"); 173 static u_int ncsizefactor = 2; 174 SYSCTL_UINT(_vfs, OID_AUTO, ncsizefactor, CTLFLAG_RW, &ncsizefactor, 0, 175 "Size factor for namecache"); 176 177 struct nchstats nchstats; /* cache effectiveness statistics */ 178 179 static struct rwlock cache_lock; 180 RW_SYSINIT(vfscache, &cache_lock, "Name Cache"); 181 182 #define CACHE_UPGRADE_LOCK() rw_try_upgrade(&cache_lock) 183 #define CACHE_RLOCK() rw_rlock(&cache_lock) 184 #define CACHE_RUNLOCK() rw_runlock(&cache_lock) 185 #define CACHE_WLOCK() rw_wlock(&cache_lock) 186 #define CACHE_WUNLOCK() rw_wunlock(&cache_lock) 187 188 /* 189 * UMA zones for the VFS cache. 190 * 191 * The small cache is used for entries with short names, which are the 192 * most common. The large cache is used for entries which are too big to 193 * fit in the small cache. 194 */ 195 static uma_zone_t cache_zone_small; 196 static uma_zone_t cache_zone_small_ts; 197 static uma_zone_t cache_zone_large; 198 static uma_zone_t cache_zone_large_ts; 199 200 #define CACHE_PATH_CUTOFF 35 201 202 static struct namecache * 203 cache_alloc(int len, int ts) 204 { 205 206 if (len > CACHE_PATH_CUTOFF) { 207 if (ts) 208 return (uma_zalloc(cache_zone_large_ts, M_WAITOK)); 209 else 210 return (uma_zalloc(cache_zone_large, M_WAITOK)); 211 } 212 if (ts) 213 return (uma_zalloc(cache_zone_small_ts, M_WAITOK)); 214 else 215 return (uma_zalloc(cache_zone_small, M_WAITOK)); 216 } 217 218 static void 219 cache_free(struct namecache *ncp) 220 { 221 int ts; 222 223 if (ncp == NULL) 224 return; 225 ts = ncp->nc_flag & NCF_TS; 226 if (ncp->nc_nlen <= CACHE_PATH_CUTOFF) { 227 if (ts) 228 uma_zfree(cache_zone_small_ts, ncp); 229 else 230 uma_zfree(cache_zone_small, ncp); 231 } else if (ts) 232 uma_zfree(cache_zone_large_ts, ncp); 233 else 234 uma_zfree(cache_zone_large, ncp); 235 } 236 237 static char * 238 nc_get_name(struct namecache *ncp) 239 { 240 struct namecache_ts *ncp_ts; 241 242 if ((ncp->nc_flag & NCF_TS) == 0) 243 return (ncp->nc_name); 244 ncp_ts = (struct namecache_ts *)ncp; 245 return (ncp_ts->nc_name); 246 } 247 248 static void 249 cache_out_ts(struct namecache *ncp, struct timespec *tsp, int *ticksp) 250 { 251 252 KASSERT((ncp->nc_flag & NCF_TS) != 0 || 253 (tsp == NULL && ticksp == NULL), 254 ("No NCF_TS")); 255 256 if (tsp != NULL) 257 *tsp = ((struct namecache_ts *)ncp)->nc_time; 258 if (ticksp != NULL) 259 *ticksp = ((struct namecache_ts *)ncp)->nc_ticks; 260 } 261 262 static int doingcache = 1; /* 1 => enable the cache */ 263 SYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, 264 "VFS namecache enabled"); 265 266 /* Export size information to userland */ 267 SYSCTL_INT(_debug_sizeof, OID_AUTO, namecache, CTLFLAG_RD, 0, 268 sizeof(struct namecache), "sizeof(struct namecache)"); 269 270 /* 271 * The new name cache statistics 272 */ 273 static SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0, 274 "Name cache statistics"); 275 #define STATNODE(mode, name, var, descr) \ 276 SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, descr); 277 STATNODE(CTLFLAG_RD, numneg, &numneg, "Number of negative cache entries"); 278 STATNODE(CTLFLAG_RD, numcache, &numcache, "Number of cache entries"); 279 static u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls, 280 "Number of cache lookups"); 281 static u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits, 282 "Number of '.' hits"); 283 static u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits, 284 "Number of '..' hits"); 285 static u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks, 286 "Number of checks in lookup"); 287 static u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss, 288 "Number of cache misses"); 289 static u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap, 290 "Number of cache misses we do not want to cache"); 291 static u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps, 292 "Number of cache hits (positive) we do not want to cache"); 293 static u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits, 294 "Number of cache hits (positive)"); 295 static u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps, 296 "Number of cache hits (negative) we do not want to cache"); 297 static u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits, 298 "Number of cache hits (negative)"); 299 static u_long numupgrades; STATNODE(CTLFLAG_RD, numupgrades, &numupgrades, 300 "Number of updates of the cache after lookup (write lock + retry)"); 301 302 SYSCTL_OPAQUE(_vfs_cache, OID_AUTO, nchstats, CTLFLAG_RD | CTLFLAG_MPSAFE, 303 &nchstats, sizeof(nchstats), "LU", 304 "VFS cache effectiveness statistics"); 305 306 307 308 static void cache_zap(struct namecache *ncp); 309 static int vn_vptocnp_locked(struct vnode **vp, struct ucred *cred, char *buf, 310 u_int *buflen); 311 static int vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir, 312 char *buf, char **retbuf, u_int buflen); 313 314 static MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries"); 315 316 #ifdef DIAGNOSTIC 317 /* 318 * Grab an atomic snapshot of the name cache hash chain lengths 319 */ 320 static SYSCTL_NODE(_debug, OID_AUTO, hashstat, CTLFLAG_RW, NULL, 321 "hash table stats"); 322 323 static int 324 sysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS) 325 { 326 int error; 327 struct nchashhead *ncpp; 328 struct namecache *ncp; 329 int n_nchash; 330 int count; 331 332 n_nchash = nchash + 1; /* nchash is max index, not count */ 333 if (!req->oldptr) 334 return SYSCTL_OUT(req, 0, n_nchash * sizeof(int)); 335 336 /* Scan hash tables for applicable entries */ 337 for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { 338 CACHE_RLOCK(); 339 count = 0; 340 LIST_FOREACH(ncp, ncpp, nc_hash) { 341 count++; 342 } 343 CACHE_RUNLOCK(); 344 error = SYSCTL_OUT(req, &count, sizeof(count)); 345 if (error) 346 return (error); 347 } 348 return (0); 349 } 350 SYSCTL_PROC(_debug_hashstat, OID_AUTO, rawnchash, CTLTYPE_INT|CTLFLAG_RD| 351 CTLFLAG_MPSAFE, 0, 0, sysctl_debug_hashstat_rawnchash, "S,int", 352 "nchash chain lengths"); 353 354 static int 355 sysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS) 356 { 357 int error; 358 struct nchashhead *ncpp; 359 struct namecache *ncp; 360 int n_nchash; 361 int count, maxlength, used, pct; 362 363 if (!req->oldptr) 364 return SYSCTL_OUT(req, 0, 4 * sizeof(int)); 365 366 n_nchash = nchash + 1; /* nchash is max index, not count */ 367 used = 0; 368 maxlength = 0; 369 370 /* Scan hash tables for applicable entries */ 371 for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { 372 count = 0; 373 CACHE_RLOCK(); 374 LIST_FOREACH(ncp, ncpp, nc_hash) { 375 count++; 376 } 377 CACHE_RUNLOCK(); 378 if (count) 379 used++; 380 if (maxlength < count) 381 maxlength = count; 382 } 383 n_nchash = nchash + 1; 384 pct = (used * 100) / (n_nchash / 100); 385 error = SYSCTL_OUT(req, &n_nchash, sizeof(n_nchash)); 386 if (error) 387 return (error); 388 error = SYSCTL_OUT(req, &used, sizeof(used)); 389 if (error) 390 return (error); 391 error = SYSCTL_OUT(req, &maxlength, sizeof(maxlength)); 392 if (error) 393 return (error); 394 error = SYSCTL_OUT(req, &pct, sizeof(pct)); 395 if (error) 396 return (error); 397 return (0); 398 } 399 SYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE_INT|CTLFLAG_RD| 400 CTLFLAG_MPSAFE, 0, 0, sysctl_debug_hashstat_nchash, "I", 401 "nchash statistics (number of total/used buckets, maximum chain length, usage percentage)"); 402 #endif 403 404 /* 405 * cache_zap(): 406 * 407 * Removes a namecache entry from cache, whether it contains an actual 408 * pointer to a vnode or if it is just a negative cache entry. 409 */ 410 static void 411 cache_zap(ncp) 412 struct namecache *ncp; 413 { 414 struct vnode *vp; 415 416 rw_assert(&cache_lock, RA_WLOCKED); 417 CTR2(KTR_VFS, "cache_zap(%p) vp %p", ncp, ncp->nc_vp); 418 #ifdef KDTRACE_HOOKS 419 if (ncp->nc_vp != NULL) { 420 SDT_PROBE(vfs, namecache, zap, done, ncp->nc_dvp, 421 nc_get_name(ncp), ncp->nc_vp, 0, 0); 422 } else { 423 SDT_PROBE(vfs, namecache, zap_negative, done, ncp->nc_dvp, 424 nc_get_name(ncp), 0, 0, 0); 425 } 426 #endif 427 vp = NULL; 428 LIST_REMOVE(ncp, nc_hash); 429 if (ncp->nc_flag & NCF_ISDOTDOT) { 430 if (ncp == ncp->nc_dvp->v_cache_dd) 431 ncp->nc_dvp->v_cache_dd = NULL; 432 } else { 433 LIST_REMOVE(ncp, nc_src); 434 if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) { 435 vp = ncp->nc_dvp; 436 numcachehv--; 437 } 438 } 439 if (ncp->nc_vp) { 440 TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst); 441 if (ncp == ncp->nc_vp->v_cache_dd) 442 ncp->nc_vp->v_cache_dd = NULL; 443 } else { 444 TAILQ_REMOVE(&ncneg, ncp, nc_dst); 445 numneg--; 446 } 447 numcache--; 448 cache_free(ncp); 449 if (vp) 450 vdrop(vp); 451 } 452 453 /* 454 * Lookup an entry in the cache 455 * 456 * Lookup is called with dvp pointing to the directory to search, 457 * cnp pointing to the name of the entry being sought. If the lookup 458 * succeeds, the vnode is returned in *vpp, and a status of -1 is 459 * returned. If the lookup determines that the name does not exist 460 * (negative cacheing), a status of ENOENT is returned. If the lookup 461 * fails, a status of zero is returned. If the directory vnode is 462 * recycled out from under us due to a forced unmount, a status of 463 * ENOENT is returned. 464 * 465 * vpp is locked and ref'd on return. If we're looking up DOTDOT, dvp is 466 * unlocked. If we're looking up . an extra ref is taken, but the lock is 467 * not recursively acquired. 468 */ 469 470 int 471 cache_lookup(dvp, vpp, cnp, tsp, ticksp) 472 struct vnode *dvp; 473 struct vnode **vpp; 474 struct componentname *cnp; 475 struct timespec *tsp; 476 int *ticksp; 477 { 478 struct namecache *ncp; 479 uint32_t hash; 480 int error, ltype, wlocked; 481 482 if (!doingcache) { 483 cnp->cn_flags &= ~MAKEENTRY; 484 return (0); 485 } 486 retry: 487 CACHE_RLOCK(); 488 wlocked = 0; 489 numcalls++; 490 error = 0; 491 492 retry_wlocked: 493 if (cnp->cn_nameptr[0] == '.') { 494 if (cnp->cn_namelen == 1) { 495 *vpp = dvp; 496 CTR2(KTR_VFS, "cache_lookup(%p, %s) found via .", 497 dvp, cnp->cn_nameptr); 498 dothits++; 499 SDT_PROBE(vfs, namecache, lookup, hit, dvp, ".", 500 *vpp, 0, 0); 501 if (tsp != NULL) 502 timespecclear(tsp); 503 if (ticksp != NULL) 504 *ticksp = ticks; 505 goto success; 506 } 507 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 508 dotdothits++; 509 if (dvp->v_cache_dd == NULL) { 510 SDT_PROBE(vfs, namecache, lookup, miss, dvp, 511 "..", NULL, 0, 0); 512 goto unlock; 513 } 514 if ((cnp->cn_flags & MAKEENTRY) == 0) { 515 if (!wlocked && !CACHE_UPGRADE_LOCK()) 516 goto wlock; 517 if (dvp->v_cache_dd->nc_flag & NCF_ISDOTDOT) 518 cache_zap(dvp->v_cache_dd); 519 dvp->v_cache_dd = NULL; 520 CACHE_WUNLOCK(); 521 return (0); 522 } 523 ncp = dvp->v_cache_dd; 524 if (ncp->nc_flag & NCF_ISDOTDOT) 525 *vpp = ncp->nc_vp; 526 else 527 *vpp = ncp->nc_dvp; 528 /* Return failure if negative entry was found. */ 529 if (*vpp == NULL) 530 goto negative_success; 531 CTR3(KTR_VFS, "cache_lookup(%p, %s) found %p via ..", 532 dvp, cnp->cn_nameptr, *vpp); 533 SDT_PROBE(vfs, namecache, lookup, hit, dvp, "..", 534 *vpp, 0, 0); 535 cache_out_ts(ncp, tsp, ticksp); 536 if ((ncp->nc_flag & (NCF_ISDOTDOT | NCF_DTS)) == 537 NCF_DTS && tsp != NULL) 538 *tsp = ((struct namecache_ts *)ncp)-> 539 nc_dotdottime; 540 goto success; 541 } 542 } 543 544 hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT); 545 hash = fnv_32_buf(&dvp, sizeof(dvp), hash); 546 LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) { 547 numchecks++; 548 if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen && 549 !bcmp(nc_get_name(ncp), cnp->cn_nameptr, ncp->nc_nlen)) 550 break; 551 } 552 553 /* We failed to find an entry */ 554 if (ncp == NULL) { 555 SDT_PROBE(vfs, namecache, lookup, miss, dvp, cnp->cn_nameptr, 556 NULL, 0, 0); 557 if ((cnp->cn_flags & MAKEENTRY) == 0) { 558 nummisszap++; 559 } else { 560 nummiss++; 561 } 562 nchstats.ncs_miss++; 563 goto unlock; 564 } 565 566 /* We don't want to have an entry, so dump it */ 567 if ((cnp->cn_flags & MAKEENTRY) == 0) { 568 numposzaps++; 569 nchstats.ncs_badhits++; 570 if (!wlocked && !CACHE_UPGRADE_LOCK()) 571 goto wlock; 572 cache_zap(ncp); 573 CACHE_WUNLOCK(); 574 return (0); 575 } 576 577 /* We found a "positive" match, return the vnode */ 578 if (ncp->nc_vp) { 579 numposhits++; 580 nchstats.ncs_goodhits++; 581 *vpp = ncp->nc_vp; 582 CTR4(KTR_VFS, "cache_lookup(%p, %s) found %p via ncp %p", 583 dvp, cnp->cn_nameptr, *vpp, ncp); 584 SDT_PROBE(vfs, namecache, lookup, hit, dvp, nc_get_name(ncp), 585 *vpp, 0, 0); 586 cache_out_ts(ncp, tsp, ticksp); 587 goto success; 588 } 589 590 negative_success: 591 /* We found a negative match, and want to create it, so purge */ 592 if (cnp->cn_nameiop == CREATE) { 593 numnegzaps++; 594 nchstats.ncs_badhits++; 595 if (!wlocked && !CACHE_UPGRADE_LOCK()) 596 goto wlock; 597 cache_zap(ncp); 598 CACHE_WUNLOCK(); 599 return (0); 600 } 601 602 if (!wlocked && !CACHE_UPGRADE_LOCK()) 603 goto wlock; 604 numneghits++; 605 /* 606 * We found a "negative" match, so we shift it to the end of 607 * the "negative" cache entries queue to satisfy LRU. Also, 608 * check to see if the entry is a whiteout; indicate this to 609 * the componentname, if so. 610 */ 611 TAILQ_REMOVE(&ncneg, ncp, nc_dst); 612 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 613 nchstats.ncs_neghits++; 614 if (ncp->nc_flag & NCF_WHITE) 615 cnp->cn_flags |= ISWHITEOUT; 616 SDT_PROBE(vfs, namecache, lookup, hit__negative, dvp, nc_get_name(ncp), 617 0, 0, 0); 618 cache_out_ts(ncp, tsp, ticksp); 619 CACHE_WUNLOCK(); 620 return (ENOENT); 621 622 wlock: 623 /* 624 * We need to update the cache after our lookup, so upgrade to 625 * a write lock and retry the operation. 626 */ 627 CACHE_RUNLOCK(); 628 CACHE_WLOCK(); 629 numupgrades++; 630 wlocked = 1; 631 goto retry_wlocked; 632 633 success: 634 /* 635 * On success we return a locked and ref'd vnode as per the lookup 636 * protocol. 637 */ 638 if (dvp == *vpp) { /* lookup on "." */ 639 VREF(*vpp); 640 if (wlocked) 641 CACHE_WUNLOCK(); 642 else 643 CACHE_RUNLOCK(); 644 /* 645 * When we lookup "." we still can be asked to lock it 646 * differently... 647 */ 648 ltype = cnp->cn_lkflags & LK_TYPE_MASK; 649 if (ltype != VOP_ISLOCKED(*vpp)) { 650 if (ltype == LK_EXCLUSIVE) { 651 vn_lock(*vpp, LK_UPGRADE | LK_RETRY); 652 if ((*vpp)->v_iflag & VI_DOOMED) { 653 /* forced unmount */ 654 vrele(*vpp); 655 *vpp = NULL; 656 return (ENOENT); 657 } 658 } else 659 vn_lock(*vpp, LK_DOWNGRADE | LK_RETRY); 660 } 661 return (-1); 662 } 663 ltype = 0; /* silence gcc warning */ 664 if (cnp->cn_flags & ISDOTDOT) { 665 ltype = VOP_ISLOCKED(dvp); 666 VOP_UNLOCK(dvp, 0); 667 } 668 VI_LOCK(*vpp); 669 if (wlocked) 670 CACHE_WUNLOCK(); 671 else 672 CACHE_RUNLOCK(); 673 error = vget(*vpp, cnp->cn_lkflags | LK_INTERLOCK, cnp->cn_thread); 674 if (cnp->cn_flags & ISDOTDOT) { 675 vn_lock(dvp, ltype | LK_RETRY); 676 if (dvp->v_iflag & VI_DOOMED) { 677 if (error == 0) 678 vput(*vpp); 679 *vpp = NULL; 680 return (ENOENT); 681 } 682 } 683 if (error) { 684 *vpp = NULL; 685 goto retry; 686 } 687 if ((cnp->cn_flags & ISLASTCN) && 688 (cnp->cn_lkflags & LK_TYPE_MASK) == LK_EXCLUSIVE) { 689 ASSERT_VOP_ELOCKED(*vpp, "cache_lookup"); 690 } 691 return (-1); 692 693 unlock: 694 if (wlocked) 695 CACHE_WUNLOCK(); 696 else 697 CACHE_RUNLOCK(); 698 return (0); 699 } 700 701 /* 702 * Add an entry to the cache. 703 */ 704 void 705 cache_enter_time(dvp, vp, cnp, tsp, dtsp) 706 struct vnode *dvp; 707 struct vnode *vp; 708 struct componentname *cnp; 709 struct timespec *tsp; 710 struct timespec *dtsp; 711 { 712 struct namecache *ncp, *n2; 713 struct namecache_ts *n3; 714 struct nchashhead *ncpp; 715 uint32_t hash; 716 int flag; 717 int hold; 718 int zap; 719 int len; 720 721 CTR3(KTR_VFS, "cache_enter(%p, %p, %s)", dvp, vp, cnp->cn_nameptr); 722 VNASSERT(vp == NULL || (vp->v_iflag & VI_DOOMED) == 0, vp, 723 ("cache_enter: Adding a doomed vnode")); 724 VNASSERT(dvp == NULL || (dvp->v_iflag & VI_DOOMED) == 0, dvp, 725 ("cache_enter: Doomed vnode used as src")); 726 727 if (!doingcache) 728 return; 729 730 /* 731 * Avoid blowout in namecache entries. 732 */ 733 if (numcache >= desiredvnodes * ncsizefactor) 734 return; 735 736 flag = 0; 737 if (cnp->cn_nameptr[0] == '.') { 738 if (cnp->cn_namelen == 1) 739 return; 740 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 741 CACHE_WLOCK(); 742 /* 743 * If dotdot entry already exists, just retarget it 744 * to new parent vnode, otherwise continue with new 745 * namecache entry allocation. 746 */ 747 if ((ncp = dvp->v_cache_dd) != NULL && 748 ncp->nc_flag & NCF_ISDOTDOT) { 749 KASSERT(ncp->nc_dvp == dvp, 750 ("wrong isdotdot parent")); 751 if (ncp->nc_vp != NULL) 752 TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, 753 ncp, nc_dst); 754 else 755 TAILQ_REMOVE(&ncneg, ncp, nc_dst); 756 if (vp != NULL) 757 TAILQ_INSERT_HEAD(&vp->v_cache_dst, 758 ncp, nc_dst); 759 else 760 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 761 ncp->nc_vp = vp; 762 CACHE_WUNLOCK(); 763 return; 764 } 765 dvp->v_cache_dd = NULL; 766 SDT_PROBE(vfs, namecache, enter, done, dvp, "..", vp, 767 0, 0); 768 CACHE_WUNLOCK(); 769 flag = NCF_ISDOTDOT; 770 } 771 } 772 773 hold = 0; 774 zap = 0; 775 776 /* 777 * Calculate the hash key and setup as much of the new 778 * namecache entry as possible before acquiring the lock. 779 */ 780 ncp = cache_alloc(cnp->cn_namelen, tsp != NULL); 781 ncp->nc_vp = vp; 782 ncp->nc_dvp = dvp; 783 ncp->nc_flag = flag; 784 if (tsp != NULL) { 785 n3 = (struct namecache_ts *)ncp; 786 n3->nc_time = *tsp; 787 n3->nc_ticks = ticks; 788 n3->nc_flag |= NCF_TS; 789 if (dtsp != NULL) { 790 n3->nc_dotdottime = *dtsp; 791 n3->nc_flag |= NCF_DTS; 792 } 793 } 794 len = ncp->nc_nlen = cnp->cn_namelen; 795 hash = fnv_32_buf(cnp->cn_nameptr, len, FNV1_32_INIT); 796 strlcpy(nc_get_name(ncp), cnp->cn_nameptr, len + 1); 797 hash = fnv_32_buf(&dvp, sizeof(dvp), hash); 798 CACHE_WLOCK(); 799 800 /* 801 * See if this vnode or negative entry is already in the cache 802 * with this name. This can happen with concurrent lookups of 803 * the same path name. 804 */ 805 ncpp = NCHHASH(hash); 806 LIST_FOREACH(n2, ncpp, nc_hash) { 807 if (n2->nc_dvp == dvp && 808 n2->nc_nlen == cnp->cn_namelen && 809 !bcmp(nc_get_name(n2), cnp->cn_nameptr, n2->nc_nlen)) { 810 if (tsp != NULL) { 811 KASSERT((n2->nc_flag & NCF_TS) != 0, 812 ("no NCF_TS")); 813 n3 = (struct namecache_ts *)n2; 814 n3->nc_time = 815 ((struct namecache_ts *)ncp)->nc_time; 816 n3->nc_ticks = 817 ((struct namecache_ts *)ncp)->nc_ticks; 818 if (dtsp != NULL) { 819 n3->nc_dotdottime = 820 ((struct namecache_ts *)ncp)-> 821 nc_dotdottime; 822 n3->nc_flag |= NCF_DTS; 823 } 824 } 825 CACHE_WUNLOCK(); 826 cache_free(ncp); 827 return; 828 } 829 } 830 831 if (flag == NCF_ISDOTDOT) { 832 /* 833 * See if we are trying to add .. entry, but some other lookup 834 * has populated v_cache_dd pointer already. 835 */ 836 if (dvp->v_cache_dd != NULL) { 837 CACHE_WUNLOCK(); 838 cache_free(ncp); 839 return; 840 } 841 KASSERT(vp == NULL || vp->v_type == VDIR, 842 ("wrong vnode type %p", vp)); 843 dvp->v_cache_dd = ncp; 844 } 845 846 numcache++; 847 if (!vp) { 848 numneg++; 849 if (cnp->cn_flags & ISWHITEOUT) 850 ncp->nc_flag |= NCF_WHITE; 851 } else if (vp->v_type == VDIR) { 852 if (flag != NCF_ISDOTDOT) { 853 /* 854 * For this case, the cache entry maps both the 855 * directory name in it and the name ".." for the 856 * directory's parent. 857 */ 858 if ((n2 = vp->v_cache_dd) != NULL && 859 (n2->nc_flag & NCF_ISDOTDOT) != 0) 860 cache_zap(n2); 861 vp->v_cache_dd = ncp; 862 } 863 } else { 864 vp->v_cache_dd = NULL; 865 } 866 867 /* 868 * Insert the new namecache entry into the appropriate chain 869 * within the cache entries table. 870 */ 871 LIST_INSERT_HEAD(ncpp, ncp, nc_hash); 872 if (flag != NCF_ISDOTDOT) { 873 if (LIST_EMPTY(&dvp->v_cache_src)) { 874 hold = 1; 875 numcachehv++; 876 } 877 LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src); 878 } 879 880 /* 881 * If the entry is "negative", we place it into the 882 * "negative" cache queue, otherwise, we place it into the 883 * destination vnode's cache entries queue. 884 */ 885 if (vp) { 886 TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst); 887 SDT_PROBE(vfs, namecache, enter, done, dvp, nc_get_name(ncp), 888 vp, 0, 0); 889 } else { 890 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 891 SDT_PROBE(vfs, namecache, enter_negative, done, dvp, 892 nc_get_name(ncp), 0, 0, 0); 893 } 894 if (numneg * ncnegfactor > numcache) { 895 ncp = TAILQ_FIRST(&ncneg); 896 zap = 1; 897 } 898 if (hold) 899 vhold(dvp); 900 if (zap) 901 cache_zap(ncp); 902 CACHE_WUNLOCK(); 903 } 904 905 /* 906 * Name cache initialization, from vfs_init() when we are booting 907 */ 908 static void 909 nchinit(void *dummy __unused) 910 { 911 912 TAILQ_INIT(&ncneg); 913 914 cache_zone_small = uma_zcreate("S VFS Cache", 915 sizeof(struct namecache) + CACHE_PATH_CUTOFF + 1, 916 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT); 917 cache_zone_small_ts = uma_zcreate("STS VFS Cache", 918 sizeof(struct namecache_ts) + CACHE_PATH_CUTOFF + 1, 919 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT); 920 cache_zone_large = uma_zcreate("L VFS Cache", 921 sizeof(struct namecache) + NAME_MAX + 1, 922 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT); 923 cache_zone_large_ts = uma_zcreate("LTS VFS Cache", 924 sizeof(struct namecache_ts) + NAME_MAX + 1, 925 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT); 926 927 nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash); 928 } 929 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL); 930 931 932 /* 933 * Invalidate all entries to a particular vnode. 934 */ 935 void 936 cache_purge(vp) 937 struct vnode *vp; 938 { 939 940 CTR1(KTR_VFS, "cache_purge(%p)", vp); 941 SDT_PROBE(vfs, namecache, purge, done, vp, 0, 0, 0, 0); 942 CACHE_WLOCK(); 943 while (!LIST_EMPTY(&vp->v_cache_src)) 944 cache_zap(LIST_FIRST(&vp->v_cache_src)); 945 while (!TAILQ_EMPTY(&vp->v_cache_dst)) 946 cache_zap(TAILQ_FIRST(&vp->v_cache_dst)); 947 if (vp->v_cache_dd != NULL) { 948 KASSERT(vp->v_cache_dd->nc_flag & NCF_ISDOTDOT, 949 ("lost dotdot link")); 950 cache_zap(vp->v_cache_dd); 951 } 952 KASSERT(vp->v_cache_dd == NULL, ("incomplete purge")); 953 CACHE_WUNLOCK(); 954 } 955 956 /* 957 * Invalidate all negative entries for a particular directory vnode. 958 */ 959 void 960 cache_purge_negative(vp) 961 struct vnode *vp; 962 { 963 struct namecache *cp, *ncp; 964 965 CTR1(KTR_VFS, "cache_purge_negative(%p)", vp); 966 SDT_PROBE(vfs, namecache, purge_negative, done, vp, 0, 0, 0, 0); 967 CACHE_WLOCK(); 968 LIST_FOREACH_SAFE(cp, &vp->v_cache_src, nc_src, ncp) { 969 if (cp->nc_vp == NULL) 970 cache_zap(cp); 971 } 972 CACHE_WUNLOCK(); 973 } 974 975 /* 976 * Flush all entries referencing a particular filesystem. 977 */ 978 void 979 cache_purgevfs(mp) 980 struct mount *mp; 981 { 982 struct nchashhead *ncpp; 983 struct namecache *ncp, *nnp; 984 985 /* Scan hash tables for applicable entries */ 986 SDT_PROBE(vfs, namecache, purgevfs, done, mp, 0, 0, 0, 0); 987 CACHE_WLOCK(); 988 for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) { 989 LIST_FOREACH_SAFE(ncp, ncpp, nc_hash, nnp) { 990 if (ncp->nc_dvp->v_mount == mp) 991 cache_zap(ncp); 992 } 993 } 994 CACHE_WUNLOCK(); 995 } 996 997 /* 998 * Perform canonical checks and cache lookup and pass on to filesystem 999 * through the vop_cachedlookup only if needed. 1000 */ 1001 1002 int 1003 vfs_cache_lookup(ap) 1004 struct vop_lookup_args /* { 1005 struct vnode *a_dvp; 1006 struct vnode **a_vpp; 1007 struct componentname *a_cnp; 1008 } */ *ap; 1009 { 1010 struct vnode *dvp; 1011 int error; 1012 struct vnode **vpp = ap->a_vpp; 1013 struct componentname *cnp = ap->a_cnp; 1014 struct ucred *cred = cnp->cn_cred; 1015 int flags = cnp->cn_flags; 1016 struct thread *td = cnp->cn_thread; 1017 1018 *vpp = NULL; 1019 dvp = ap->a_dvp; 1020 1021 if (dvp->v_type != VDIR) 1022 return (ENOTDIR); 1023 1024 if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && 1025 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) 1026 return (EROFS); 1027 1028 error = VOP_ACCESS(dvp, VEXEC, cred, td); 1029 if (error) 1030 return (error); 1031 1032 error = cache_lookup(dvp, vpp, cnp, NULL, NULL); 1033 if (error == 0) 1034 return (VOP_CACHEDLOOKUP(dvp, vpp, cnp)); 1035 if (error == -1) 1036 return (0); 1037 return (error); 1038 } 1039 1040 1041 #ifndef _SYS_SYSPROTO_H_ 1042 struct __getcwd_args { 1043 u_char *buf; 1044 u_int buflen; 1045 }; 1046 #endif 1047 1048 /* 1049 * XXX All of these sysctls would probably be more productive dead. 1050 */ 1051 static int disablecwd; 1052 SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0, 1053 "Disable the getcwd syscall"); 1054 1055 /* Implementation of the getcwd syscall. */ 1056 int 1057 sys___getcwd(td, uap) 1058 struct thread *td; 1059 struct __getcwd_args *uap; 1060 { 1061 1062 return (kern___getcwd(td, uap->buf, UIO_USERSPACE, uap->buflen)); 1063 } 1064 1065 int 1066 kern___getcwd(struct thread *td, u_char *buf, enum uio_seg bufseg, u_int buflen) 1067 { 1068 char *bp, *tmpbuf; 1069 struct filedesc *fdp; 1070 struct vnode *cdir, *rdir; 1071 int error; 1072 1073 if (disablecwd) 1074 return (ENODEV); 1075 if (buflen < 2) 1076 return (EINVAL); 1077 if (buflen > MAXPATHLEN) 1078 buflen = MAXPATHLEN; 1079 1080 tmpbuf = malloc(buflen, M_TEMP, M_WAITOK); 1081 fdp = td->td_proc->p_fd; 1082 FILEDESC_SLOCK(fdp); 1083 cdir = fdp->fd_cdir; 1084 VREF(cdir); 1085 rdir = fdp->fd_rdir; 1086 VREF(rdir); 1087 FILEDESC_SUNLOCK(fdp); 1088 error = vn_fullpath1(td, cdir, rdir, tmpbuf, &bp, buflen); 1089 vrele(rdir); 1090 vrele(cdir); 1091 1092 if (!error) { 1093 if (bufseg == UIO_SYSSPACE) 1094 bcopy(bp, buf, strlen(bp) + 1); 1095 else 1096 error = copyout(bp, buf, strlen(bp) + 1); 1097 #ifdef KTRACE 1098 if (KTRPOINT(curthread, KTR_NAMEI)) 1099 ktrnamei(bp); 1100 #endif 1101 } 1102 free(tmpbuf, M_TEMP); 1103 return (error); 1104 } 1105 1106 /* 1107 * Thus begins the fullpath magic. 1108 */ 1109 1110 #undef STATNODE 1111 #define STATNODE(name, descr) \ 1112 static u_int name; \ 1113 SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, descr) 1114 1115 static int disablefullpath; 1116 SYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, &disablefullpath, 0, 1117 "Disable the vn_fullpath function"); 1118 1119 /* These count for kern___getcwd(), too. */ 1120 STATNODE(numfullpathcalls, "Number of fullpath search calls"); 1121 STATNODE(numfullpathfail1, "Number of fullpath search errors (ENOTDIR)"); 1122 STATNODE(numfullpathfail2, 1123 "Number of fullpath search errors (VOP_VPTOCNP failures)"); 1124 STATNODE(numfullpathfail4, "Number of fullpath search errors (ENOMEM)"); 1125 STATNODE(numfullpathfound, "Number of successful fullpath calls"); 1126 1127 /* 1128 * Retrieve the full filesystem path that correspond to a vnode from the name 1129 * cache (if available) 1130 */ 1131 int 1132 vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf) 1133 { 1134 char *buf; 1135 struct filedesc *fdp; 1136 struct vnode *rdir; 1137 int error; 1138 1139 if (disablefullpath) 1140 return (ENODEV); 1141 if (vn == NULL) 1142 return (EINVAL); 1143 1144 buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 1145 fdp = td->td_proc->p_fd; 1146 FILEDESC_SLOCK(fdp); 1147 rdir = fdp->fd_rdir; 1148 VREF(rdir); 1149 FILEDESC_SUNLOCK(fdp); 1150 error = vn_fullpath1(td, vn, rdir, buf, retbuf, MAXPATHLEN); 1151 vrele(rdir); 1152 1153 if (!error) 1154 *freebuf = buf; 1155 else 1156 free(buf, M_TEMP); 1157 return (error); 1158 } 1159 1160 /* 1161 * This function is similar to vn_fullpath, but it attempts to lookup the 1162 * pathname relative to the global root mount point. This is required for the 1163 * auditing sub-system, as audited pathnames must be absolute, relative to the 1164 * global root mount point. 1165 */ 1166 int 1167 vn_fullpath_global(struct thread *td, struct vnode *vn, 1168 char **retbuf, char **freebuf) 1169 { 1170 char *buf; 1171 int error; 1172 1173 if (disablefullpath) 1174 return (ENODEV); 1175 if (vn == NULL) 1176 return (EINVAL); 1177 buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 1178 error = vn_fullpath1(td, vn, rootvnode, buf, retbuf, MAXPATHLEN); 1179 if (!error) 1180 *freebuf = buf; 1181 else 1182 free(buf, M_TEMP); 1183 return (error); 1184 } 1185 1186 int 1187 vn_vptocnp(struct vnode **vp, struct ucred *cred, char *buf, u_int *buflen) 1188 { 1189 int error; 1190 1191 CACHE_RLOCK(); 1192 error = vn_vptocnp_locked(vp, cred, buf, buflen); 1193 if (error == 0) 1194 CACHE_RUNLOCK(); 1195 return (error); 1196 } 1197 1198 static int 1199 vn_vptocnp_locked(struct vnode **vp, struct ucred *cred, char *buf, 1200 u_int *buflen) 1201 { 1202 struct vnode *dvp; 1203 struct namecache *ncp; 1204 int error; 1205 1206 TAILQ_FOREACH(ncp, &((*vp)->v_cache_dst), nc_dst) { 1207 if ((ncp->nc_flag & NCF_ISDOTDOT) == 0) 1208 break; 1209 } 1210 if (ncp != NULL) { 1211 if (*buflen < ncp->nc_nlen) { 1212 CACHE_RUNLOCK(); 1213 vrele(*vp); 1214 numfullpathfail4++; 1215 error = ENOMEM; 1216 SDT_PROBE(vfs, namecache, fullpath, return, error, 1217 vp, NULL, 0, 0); 1218 return (error); 1219 } 1220 *buflen -= ncp->nc_nlen; 1221 memcpy(buf + *buflen, nc_get_name(ncp), ncp->nc_nlen); 1222 SDT_PROBE(vfs, namecache, fullpath, hit, ncp->nc_dvp, 1223 nc_get_name(ncp), vp, 0, 0); 1224 dvp = *vp; 1225 *vp = ncp->nc_dvp; 1226 vref(*vp); 1227 CACHE_RUNLOCK(); 1228 vrele(dvp); 1229 CACHE_RLOCK(); 1230 return (0); 1231 } 1232 SDT_PROBE(vfs, namecache, fullpath, miss, vp, 0, 0, 0, 0); 1233 1234 CACHE_RUNLOCK(); 1235 vn_lock(*vp, LK_SHARED | LK_RETRY); 1236 error = VOP_VPTOCNP(*vp, &dvp, cred, buf, buflen); 1237 vput(*vp); 1238 if (error) { 1239 numfullpathfail2++; 1240 SDT_PROBE(vfs, namecache, fullpath, return, error, vp, 1241 NULL, 0, 0); 1242 return (error); 1243 } 1244 1245 *vp = dvp; 1246 CACHE_RLOCK(); 1247 if (dvp->v_iflag & VI_DOOMED) { 1248 /* forced unmount */ 1249 CACHE_RUNLOCK(); 1250 vrele(dvp); 1251 error = ENOENT; 1252 SDT_PROBE(vfs, namecache, fullpath, return, error, vp, 1253 NULL, 0, 0); 1254 return (error); 1255 } 1256 /* 1257 * *vp has its use count incremented still. 1258 */ 1259 1260 return (0); 1261 } 1262 1263 /* 1264 * The magic behind kern___getcwd() and vn_fullpath(). 1265 */ 1266 static int 1267 vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir, 1268 char *buf, char **retbuf, u_int buflen) 1269 { 1270 int error, slash_prefixed; 1271 #ifdef KDTRACE_HOOKS 1272 struct vnode *startvp = vp; 1273 #endif 1274 struct vnode *vp1; 1275 1276 buflen--; 1277 buf[buflen] = '\0'; 1278 error = 0; 1279 slash_prefixed = 0; 1280 1281 SDT_PROBE(vfs, namecache, fullpath, entry, vp, 0, 0, 0, 0); 1282 numfullpathcalls++; 1283 vref(vp); 1284 CACHE_RLOCK(); 1285 if (vp->v_type != VDIR) { 1286 error = vn_vptocnp_locked(&vp, td->td_ucred, buf, &buflen); 1287 if (error) 1288 return (error); 1289 if (buflen == 0) { 1290 CACHE_RUNLOCK(); 1291 vrele(vp); 1292 return (ENOMEM); 1293 } 1294 buf[--buflen] = '/'; 1295 slash_prefixed = 1; 1296 } 1297 while (vp != rdir && vp != rootvnode) { 1298 if (vp->v_vflag & VV_ROOT) { 1299 if (vp->v_iflag & VI_DOOMED) { /* forced unmount */ 1300 CACHE_RUNLOCK(); 1301 vrele(vp); 1302 error = ENOENT; 1303 SDT_PROBE(vfs, namecache, fullpath, return, 1304 error, vp, NULL, 0, 0); 1305 break; 1306 } 1307 vp1 = vp->v_mount->mnt_vnodecovered; 1308 vref(vp1); 1309 CACHE_RUNLOCK(); 1310 vrele(vp); 1311 vp = vp1; 1312 CACHE_RLOCK(); 1313 continue; 1314 } 1315 if (vp->v_type != VDIR) { 1316 CACHE_RUNLOCK(); 1317 vrele(vp); 1318 numfullpathfail1++; 1319 error = ENOTDIR; 1320 SDT_PROBE(vfs, namecache, fullpath, return, 1321 error, vp, NULL, 0, 0); 1322 break; 1323 } 1324 error = vn_vptocnp_locked(&vp, td->td_ucred, buf, &buflen); 1325 if (error) 1326 break; 1327 if (buflen == 0) { 1328 CACHE_RUNLOCK(); 1329 vrele(vp); 1330 error = ENOMEM; 1331 SDT_PROBE(vfs, namecache, fullpath, return, error, 1332 startvp, NULL, 0, 0); 1333 break; 1334 } 1335 buf[--buflen] = '/'; 1336 slash_prefixed = 1; 1337 } 1338 if (error) 1339 return (error); 1340 if (!slash_prefixed) { 1341 if (buflen == 0) { 1342 CACHE_RUNLOCK(); 1343 vrele(vp); 1344 numfullpathfail4++; 1345 SDT_PROBE(vfs, namecache, fullpath, return, ENOMEM, 1346 startvp, NULL, 0, 0); 1347 return (ENOMEM); 1348 } 1349 buf[--buflen] = '/'; 1350 } 1351 numfullpathfound++; 1352 CACHE_RUNLOCK(); 1353 vrele(vp); 1354 1355 SDT_PROBE(vfs, namecache, fullpath, return, 0, startvp, buf + buflen, 1356 0, 0); 1357 *retbuf = buf + buflen; 1358 return (0); 1359 } 1360 1361 struct vnode * 1362 vn_dir_dd_ino(struct vnode *vp) 1363 { 1364 struct namecache *ncp; 1365 struct vnode *ddvp; 1366 1367 ASSERT_VOP_LOCKED(vp, "vn_dir_dd_ino"); 1368 CACHE_RLOCK(); 1369 TAILQ_FOREACH(ncp, &(vp->v_cache_dst), nc_dst) { 1370 if ((ncp->nc_flag & NCF_ISDOTDOT) != 0) 1371 continue; 1372 ddvp = ncp->nc_dvp; 1373 VI_LOCK(ddvp); 1374 CACHE_RUNLOCK(); 1375 if (vget(ddvp, LK_INTERLOCK | LK_SHARED | LK_NOWAIT, curthread)) 1376 return (NULL); 1377 return (ddvp); 1378 } 1379 CACHE_RUNLOCK(); 1380 return (NULL); 1381 } 1382 1383 int 1384 vn_commname(struct vnode *vp, char *buf, u_int buflen) 1385 { 1386 struct namecache *ncp; 1387 int l; 1388 1389 CACHE_RLOCK(); 1390 TAILQ_FOREACH(ncp, &vp->v_cache_dst, nc_dst) 1391 if ((ncp->nc_flag & NCF_ISDOTDOT) == 0) 1392 break; 1393 if (ncp == NULL) { 1394 CACHE_RUNLOCK(); 1395 return (ENOENT); 1396 } 1397 l = min(ncp->nc_nlen, buflen - 1); 1398 memcpy(buf, nc_get_name(ncp), l); 1399 CACHE_RUNLOCK(); 1400 buf[l] = '\0'; 1401 return (0); 1402 } 1403 1404 /* ABI compat shims for old kernel modules. */ 1405 #undef cache_enter 1406 1407 void cache_enter(struct vnode *dvp, struct vnode *vp, 1408 struct componentname *cnp); 1409 1410 void 1411 cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp) 1412 { 1413 1414 cache_enter_time(dvp, vp, cnp, NULL, NULL); 1415 } 1416 1417 /* 1418 * This function updates path string to vnode's full global path 1419 * and checks the size of the new path string against the pathlen argument. 1420 * 1421 * Requires a locked, referenced vnode and GIANT lock held. 1422 * Vnode is re-locked on success or ENODEV, otherwise unlocked. 1423 * 1424 * If sysctl debug.disablefullpath is set, ENODEV is returned, 1425 * vnode is left locked and path remain untouched. 1426 * 1427 * If vp is a directory, the call to vn_fullpath_global() always succeeds 1428 * because it falls back to the ".." lookup if the namecache lookup fails. 1429 */ 1430 int 1431 vn_path_to_global_path(struct thread *td, struct vnode *vp, char *path, 1432 u_int pathlen) 1433 { 1434 struct nameidata nd; 1435 struct vnode *vp1; 1436 char *rpath, *fbuf; 1437 int error; 1438 1439 ASSERT_VOP_ELOCKED(vp, __func__); 1440 1441 /* Return ENODEV if sysctl debug.disablefullpath==1 */ 1442 if (disablefullpath) 1443 return (ENODEV); 1444 1445 /* Construct global filesystem path from vp. */ 1446 VOP_UNLOCK(vp, 0); 1447 error = vn_fullpath_global(td, vp, &rpath, &fbuf); 1448 1449 if (error != 0) { 1450 vrele(vp); 1451 return (error); 1452 } 1453 1454 if (strlen(rpath) >= pathlen) { 1455 vrele(vp); 1456 error = ENAMETOOLONG; 1457 goto out; 1458 } 1459 1460 /* 1461 * Re-lookup the vnode by path to detect a possible rename. 1462 * As a side effect, the vnode is relocked. 1463 * If vnode was renamed, return ENOENT. 1464 */ 1465 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, 1466 UIO_SYSSPACE, path, td); 1467 error = namei(&nd); 1468 if (error != 0) { 1469 vrele(vp); 1470 goto out; 1471 } 1472 NDFREE(&nd, NDF_ONLY_PNBUF); 1473 vp1 = nd.ni_vp; 1474 vrele(vp); 1475 if (vp1 == vp) 1476 strcpy(path, rpath); 1477 else { 1478 vput(vp1); 1479 error = ENOENT; 1480 } 1481 1482 out: 1483 free(fbuf, M_TEMP); 1484 return (error); 1485 } 1486