1 /*- 2 * Copyright (c) 1989, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Poul-Henning Kamp of the FreeBSD Project. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include "opt_kdtrace.h" 39 #include "opt_ktrace.h" 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/filedesc.h> 44 #include <sys/fnv_hash.h> 45 #include <sys/kernel.h> 46 #include <sys/lock.h> 47 #include <sys/malloc.h> 48 #include <sys/fcntl.h> 49 #include <sys/mount.h> 50 #include <sys/namei.h> 51 #include <sys/proc.h> 52 #include <sys/rwlock.h> 53 #include <sys/sdt.h> 54 #include <sys/syscallsubr.h> 55 #include <sys/sysctl.h> 56 #include <sys/sysproto.h> 57 #include <sys/vnode.h> 58 #ifdef KTRACE 59 #include <sys/ktrace.h> 60 #endif 61 62 #include <vm/uma.h> 63 64 SDT_PROVIDER_DECLARE(vfs); 65 SDT_PROBE_DEFINE3(vfs, namecache, enter, done, done, "struct vnode *", "char *", 66 "struct vnode *"); 67 SDT_PROBE_DEFINE2(vfs, namecache, enter_negative, done, done, "struct vnode *", 68 "char *"); 69 SDT_PROBE_DEFINE1(vfs, namecache, fullpath, entry, entry, "struct vnode *"); 70 SDT_PROBE_DEFINE3(vfs, namecache, fullpath, hit, hit, "struct vnode *", 71 "struct char *", "struct vnode *"); 72 SDT_PROBE_DEFINE1(vfs, namecache, fullpath, miss, miss, "struct vnode *"); 73 SDT_PROBE_DEFINE3(vfs, namecache, fullpath, return, return, "int", 74 "struct vnode *", "struct char *"); 75 SDT_PROBE_DEFINE3(vfs, namecache, lookup, hit, hit, "struct vnode *", "char *", 76 "struct vnode *"); 77 SDT_PROBE_DEFINE2(vfs, namecache, lookup, hit_negative, hit-negative, 78 "struct vnode *", "char *"); 79 SDT_PROBE_DEFINE2(vfs, namecache, lookup, miss, miss, "struct vnode *", 80 "char *"); 81 SDT_PROBE_DEFINE1(vfs, namecache, purge, done, done, "struct vnode *"); 82 SDT_PROBE_DEFINE1(vfs, namecache, purge_negative, done, done, "struct vnode *"); 83 SDT_PROBE_DEFINE1(vfs, namecache, purgevfs, done, done, "struct mount *"); 84 SDT_PROBE_DEFINE3(vfs, namecache, zap, done, done, "struct vnode *", "char *", 85 "struct vnode *"); 86 SDT_PROBE_DEFINE2(vfs, namecache, zap_negative, done, done, "struct vnode *", 87 "char *"); 88 89 /* 90 * This structure describes the elements in the cache of recent 91 * names looked up by namei. 92 */ 93 94 struct namecache { 95 LIST_ENTRY(namecache) nc_hash; /* hash chain */ 96 LIST_ENTRY(namecache) nc_src; /* source vnode list */ 97 TAILQ_ENTRY(namecache) nc_dst; /* destination vnode list */ 98 struct vnode *nc_dvp; /* vnode of parent of name */ 99 struct vnode *nc_vp; /* vnode the name refers to */ 100 u_char nc_flag; /* flag bits */ 101 u_char nc_nlen; /* length of name */ 102 char nc_name[0]; /* segment name + nul */ 103 }; 104 105 /* 106 * struct namecache_ts repeats struct namecache layout up to the 107 * nc_nlen member. 108 */ 109 struct namecache_ts { 110 LIST_ENTRY(namecache) nc_hash; /* hash chain */ 111 LIST_ENTRY(namecache) nc_src; /* source vnode list */ 112 TAILQ_ENTRY(namecache) nc_dst; /* destination vnode list */ 113 struct vnode *nc_dvp; /* vnode of parent of name */ 114 struct vnode *nc_vp; /* vnode the name refers to */ 115 u_char nc_flag; /* flag bits */ 116 u_char nc_nlen; /* length of name */ 117 struct timespec nc_time; /* timespec provided by fs */ 118 int nc_ticks; /* ticks value when entry was added */ 119 char nc_name[0]; /* segment name + nul */ 120 }; 121 122 /* 123 * Flags in namecache.nc_flag 124 */ 125 #define NCF_WHITE 0x01 126 #define NCF_ISDOTDOT 0x02 127 #define NCF_TS 0x04 128 129 /* 130 * Name caching works as follows: 131 * 132 * Names found by directory scans are retained in a cache 133 * for future reference. It is managed LRU, so frequently 134 * used names will hang around. Cache is indexed by hash value 135 * obtained from (vp, name) where vp refers to the directory 136 * containing name. 137 * 138 * If it is a "negative" entry, (i.e. for a name that is known NOT to 139 * exist) the vnode pointer will be NULL. 140 * 141 * Upon reaching the last segment of a path, if the reference 142 * is for DELETE, or NOCACHE is set (rewrite), and the 143 * name is located in the cache, it will be dropped. 144 */ 145 146 /* 147 * Structures associated with name cacheing. 148 */ 149 #define NCHHASH(hash) \ 150 (&nchashtbl[(hash) & nchash]) 151 static LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */ 152 static TAILQ_HEAD(, namecache) ncneg; /* Hash Table */ 153 static u_long nchash; /* size of hash table */ 154 SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, 155 "Size of namecache hash table"); 156 static u_long ncnegfactor = 16; /* ratio of negative entries */ 157 SYSCTL_ULONG(_vfs, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, 158 "Ratio of negative namecache entries"); 159 static u_long numneg; /* number of negative entries allocated */ 160 SYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, 161 "Number of negative entries in namecache"); 162 static u_long numcache; /* number of cache entries allocated */ 163 SYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, 164 "Number of namecache entries"); 165 static u_long numcachehv; /* number of cache entries with vnodes held */ 166 SYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0, 167 "Number of namecache entries with vnodes held"); 168 static u_int ncsizefactor = 2; 169 SYSCTL_UINT(_vfs, OID_AUTO, ncsizefactor, CTLFLAG_RW, &ncsizefactor, 0, 170 "Size factor for namecache"); 171 172 struct nchstats nchstats; /* cache effectiveness statistics */ 173 174 static struct rwlock cache_lock; 175 RW_SYSINIT(vfscache, &cache_lock, "Name Cache"); 176 177 #define CACHE_UPGRADE_LOCK() rw_try_upgrade(&cache_lock) 178 #define CACHE_RLOCK() rw_rlock(&cache_lock) 179 #define CACHE_RUNLOCK() rw_runlock(&cache_lock) 180 #define CACHE_WLOCK() rw_wlock(&cache_lock) 181 #define CACHE_WUNLOCK() rw_wunlock(&cache_lock) 182 183 /* 184 * UMA zones for the VFS cache. 185 * 186 * The small cache is used for entries with short names, which are the 187 * most common. The large cache is used for entries which are too big to 188 * fit in the small cache. 189 */ 190 static uma_zone_t cache_zone_small; 191 static uma_zone_t cache_zone_small_ts; 192 static uma_zone_t cache_zone_large; 193 194 #define CACHE_PATH_CUTOFF 35 195 196 static struct namecache * 197 cache_alloc(int len, int ts) 198 { 199 200 if (len > CACHE_PATH_CUTOFF) 201 return (uma_zalloc(cache_zone_large, M_WAITOK)); 202 if (ts) 203 return (uma_zalloc(cache_zone_small_ts, M_WAITOK)); 204 else 205 return (uma_zalloc(cache_zone_small, M_WAITOK)); 206 } 207 208 static void 209 cache_free(struct namecache *ncp) 210 { 211 int ts; 212 213 if (ncp == NULL) 214 return; 215 ts = ncp->nc_flag & NCF_TS; 216 if (ncp->nc_nlen <= CACHE_PATH_CUTOFF) { 217 if (ts) 218 uma_zfree(cache_zone_small_ts, ncp); 219 else 220 uma_zfree(cache_zone_small, ncp); 221 } else 222 uma_zfree(cache_zone_large, ncp); 223 } 224 225 static char * 226 nc_get_name(struct namecache *ncp) 227 { 228 struct namecache_ts *ncp_ts; 229 230 if ((ncp->nc_flag & NCF_TS) == 0) 231 return (ncp->nc_name); 232 ncp_ts = (struct namecache_ts *)ncp; 233 return (ncp_ts->nc_name); 234 } 235 236 static void 237 cache_out_ts(struct namecache *ncp, struct timespec *tsp, int *ticksp) 238 { 239 240 KASSERT((ncp->nc_flag & NCF_TS) != 0 || 241 (tsp == NULL && ticksp == NULL), 242 ("No NCF_TS")); 243 244 if (tsp != NULL) 245 *tsp = ((struct namecache_ts *)ncp)->nc_time; 246 if (ticksp != NULL) 247 *ticksp = ((struct namecache_ts *)ncp)->nc_ticks; 248 } 249 250 static int doingcache = 1; /* 1 => enable the cache */ 251 SYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, 252 "VFS namecache enabled"); 253 254 /* Export size information to userland */ 255 SYSCTL_INT(_debug_sizeof, OID_AUTO, namecache, CTLFLAG_RD, 0, 256 sizeof(struct namecache), "sizeof(struct namecache)"); 257 258 /* 259 * The new name cache statistics 260 */ 261 static SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0, 262 "Name cache statistics"); 263 #define STATNODE(mode, name, var, descr) \ 264 SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, descr); 265 STATNODE(CTLFLAG_RD, numneg, &numneg, "Number of negative cache entries"); 266 STATNODE(CTLFLAG_RD, numcache, &numcache, "Number of cache entries"); 267 static u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls, 268 "Number of cache lookups"); 269 static u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits, 270 "Number of '.' hits"); 271 static u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits, 272 "Number of '..' hits"); 273 static u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks, 274 "Number of checks in lookup"); 275 static u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss, 276 "Number of cache misses"); 277 static u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap, 278 "Number of cache misses we do not want to cache"); 279 static u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps, 280 "Number of cache hits (positive) we do not want to cache"); 281 static u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits, 282 "Number of cache hits (positive)"); 283 static u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps, 284 "Number of cache hits (negative) we do not want to cache"); 285 static u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits, 286 "Number of cache hits (negative)"); 287 static u_long numupgrades; STATNODE(CTLFLAG_RD, numupgrades, &numupgrades, 288 "Number of updates of the cache after lookup (write lock + retry)"); 289 290 SYSCTL_OPAQUE(_vfs_cache, OID_AUTO, nchstats, CTLFLAG_RD | CTLFLAG_MPSAFE, 291 &nchstats, sizeof(nchstats), "LU", 292 "VFS cache effectiveness statistics"); 293 294 295 296 static void cache_zap(struct namecache *ncp); 297 static int vn_vptocnp_locked(struct vnode **vp, struct ucred *cred, char *buf, 298 u_int *buflen); 299 static int vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir, 300 char *buf, char **retbuf, u_int buflen); 301 302 static MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries"); 303 304 #ifdef DIAGNOSTIC 305 /* 306 * Grab an atomic snapshot of the name cache hash chain lengths 307 */ 308 static SYSCTL_NODE(_debug, OID_AUTO, hashstat, CTLFLAG_RW, NULL, 309 "hash table stats"); 310 311 static int 312 sysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS) 313 { 314 int error; 315 struct nchashhead *ncpp; 316 struct namecache *ncp; 317 int n_nchash; 318 int count; 319 320 n_nchash = nchash + 1; /* nchash is max index, not count */ 321 if (!req->oldptr) 322 return SYSCTL_OUT(req, 0, n_nchash * sizeof(int)); 323 324 /* Scan hash tables for applicable entries */ 325 for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { 326 CACHE_RLOCK(); 327 count = 0; 328 LIST_FOREACH(ncp, ncpp, nc_hash) { 329 count++; 330 } 331 CACHE_RUNLOCK(); 332 error = SYSCTL_OUT(req, &count, sizeof(count)); 333 if (error) 334 return (error); 335 } 336 return (0); 337 } 338 SYSCTL_PROC(_debug_hashstat, OID_AUTO, rawnchash, CTLTYPE_INT|CTLFLAG_RD| 339 CTLFLAG_MPSAFE, 0, 0, sysctl_debug_hashstat_rawnchash, "S,int", 340 "nchash chain lengths"); 341 342 static int 343 sysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS) 344 { 345 int error; 346 struct nchashhead *ncpp; 347 struct namecache *ncp; 348 int n_nchash; 349 int count, maxlength, used, pct; 350 351 if (!req->oldptr) 352 return SYSCTL_OUT(req, 0, 4 * sizeof(int)); 353 354 n_nchash = nchash + 1; /* nchash is max index, not count */ 355 used = 0; 356 maxlength = 0; 357 358 /* Scan hash tables for applicable entries */ 359 for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { 360 count = 0; 361 CACHE_RLOCK(); 362 LIST_FOREACH(ncp, ncpp, nc_hash) { 363 count++; 364 } 365 CACHE_RUNLOCK(); 366 if (count) 367 used++; 368 if (maxlength < count) 369 maxlength = count; 370 } 371 n_nchash = nchash + 1; 372 pct = (used * 100 * 100) / n_nchash; 373 error = SYSCTL_OUT(req, &n_nchash, sizeof(n_nchash)); 374 if (error) 375 return (error); 376 error = SYSCTL_OUT(req, &used, sizeof(used)); 377 if (error) 378 return (error); 379 error = SYSCTL_OUT(req, &maxlength, sizeof(maxlength)); 380 if (error) 381 return (error); 382 error = SYSCTL_OUT(req, &pct, sizeof(pct)); 383 if (error) 384 return (error); 385 return (0); 386 } 387 SYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE_INT|CTLFLAG_RD| 388 CTLFLAG_MPSAFE, 0, 0, sysctl_debug_hashstat_nchash, "I", 389 "nchash chain lengths"); 390 #endif 391 392 /* 393 * cache_zap(): 394 * 395 * Removes a namecache entry from cache, whether it contains an actual 396 * pointer to a vnode or if it is just a negative cache entry. 397 */ 398 static void 399 cache_zap(ncp) 400 struct namecache *ncp; 401 { 402 struct vnode *vp; 403 404 rw_assert(&cache_lock, RA_WLOCKED); 405 CTR2(KTR_VFS, "cache_zap(%p) vp %p", ncp, ncp->nc_vp); 406 #ifdef KDTRACE_HOOKS 407 if (ncp->nc_vp != NULL) { 408 SDT_PROBE(vfs, namecache, zap, done, ncp->nc_dvp, 409 nc_get_name(ncp), ncp->nc_vp, 0, 0); 410 } else { 411 SDT_PROBE(vfs, namecache, zap_negative, done, ncp->nc_dvp, 412 nc_get_name(ncp), 0, 0, 0); 413 } 414 #endif 415 vp = NULL; 416 LIST_REMOVE(ncp, nc_hash); 417 if (ncp->nc_flag & NCF_ISDOTDOT) { 418 if (ncp == ncp->nc_dvp->v_cache_dd) 419 ncp->nc_dvp->v_cache_dd = NULL; 420 } else { 421 LIST_REMOVE(ncp, nc_src); 422 if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) { 423 vp = ncp->nc_dvp; 424 numcachehv--; 425 } 426 } 427 if (ncp->nc_vp) { 428 TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst); 429 if (ncp == ncp->nc_vp->v_cache_dd) 430 ncp->nc_vp->v_cache_dd = NULL; 431 } else { 432 TAILQ_REMOVE(&ncneg, ncp, nc_dst); 433 numneg--; 434 } 435 numcache--; 436 cache_free(ncp); 437 if (vp) 438 vdrop(vp); 439 } 440 441 /* 442 * Lookup an entry in the cache 443 * 444 * Lookup is called with dvp pointing to the directory to search, 445 * cnp pointing to the name of the entry being sought. If the lookup 446 * succeeds, the vnode is returned in *vpp, and a status of -1 is 447 * returned. If the lookup determines that the name does not exist 448 * (negative cacheing), a status of ENOENT is returned. If the lookup 449 * fails, a status of zero is returned. If the directory vnode is 450 * recycled out from under us due to a forced unmount, a status of 451 * ENOENT is returned. 452 * 453 * vpp is locked and ref'd on return. If we're looking up DOTDOT, dvp is 454 * unlocked. If we're looking up . an extra ref is taken, but the lock is 455 * not recursively acquired. 456 */ 457 458 int 459 cache_lookup_times(dvp, vpp, cnp, tsp, ticksp) 460 struct vnode *dvp; 461 struct vnode **vpp; 462 struct componentname *cnp; 463 struct timespec *tsp; 464 int *ticksp; 465 { 466 struct namecache *ncp; 467 uint32_t hash; 468 int error, ltype, wlocked; 469 470 if (!doingcache) { 471 cnp->cn_flags &= ~MAKEENTRY; 472 return (0); 473 } 474 retry: 475 CACHE_RLOCK(); 476 wlocked = 0; 477 numcalls++; 478 error = 0; 479 480 retry_wlocked: 481 if (cnp->cn_nameptr[0] == '.') { 482 if (cnp->cn_namelen == 1) { 483 *vpp = dvp; 484 CTR2(KTR_VFS, "cache_lookup(%p, %s) found via .", 485 dvp, cnp->cn_nameptr); 486 dothits++; 487 SDT_PROBE(vfs, namecache, lookup, hit, dvp, ".", 488 *vpp, 0, 0); 489 if (tsp != NULL) 490 timespecclear(tsp); 491 if (ticksp != NULL) 492 *ticksp = ticks; 493 goto success; 494 } 495 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 496 dotdothits++; 497 if (dvp->v_cache_dd == NULL) { 498 SDT_PROBE(vfs, namecache, lookup, miss, dvp, 499 "..", NULL, 0, 0); 500 goto unlock; 501 } 502 if ((cnp->cn_flags & MAKEENTRY) == 0) { 503 if (!wlocked && !CACHE_UPGRADE_LOCK()) 504 goto wlock; 505 if (dvp->v_cache_dd->nc_flag & NCF_ISDOTDOT) 506 cache_zap(dvp->v_cache_dd); 507 dvp->v_cache_dd = NULL; 508 CACHE_WUNLOCK(); 509 return (0); 510 } 511 ncp = dvp->v_cache_dd; 512 if (ncp->nc_flag & NCF_ISDOTDOT) 513 *vpp = ncp->nc_vp; 514 else 515 *vpp = ncp->nc_dvp; 516 /* Return failure if negative entry was found. */ 517 if (*vpp == NULL) 518 goto negative_success; 519 CTR3(KTR_VFS, "cache_lookup(%p, %s) found %p via ..", 520 dvp, cnp->cn_nameptr, *vpp); 521 SDT_PROBE(vfs, namecache, lookup, hit, dvp, "..", 522 *vpp, 0, 0); 523 cache_out_ts(ncp, tsp, ticksp); 524 goto success; 525 } 526 } 527 528 hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT); 529 hash = fnv_32_buf(&dvp, sizeof(dvp), hash); 530 LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) { 531 numchecks++; 532 if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen && 533 !bcmp(nc_get_name(ncp), cnp->cn_nameptr, ncp->nc_nlen)) 534 break; 535 } 536 537 /* We failed to find an entry */ 538 if (ncp == NULL) { 539 SDT_PROBE(vfs, namecache, lookup, miss, dvp, cnp->cn_nameptr, 540 NULL, 0, 0); 541 if ((cnp->cn_flags & MAKEENTRY) == 0) { 542 nummisszap++; 543 } else { 544 nummiss++; 545 } 546 nchstats.ncs_miss++; 547 goto unlock; 548 } 549 550 /* We don't want to have an entry, so dump it */ 551 if ((cnp->cn_flags & MAKEENTRY) == 0) { 552 numposzaps++; 553 nchstats.ncs_badhits++; 554 if (!wlocked && !CACHE_UPGRADE_LOCK()) 555 goto wlock; 556 cache_zap(ncp); 557 CACHE_WUNLOCK(); 558 return (0); 559 } 560 561 /* We found a "positive" match, return the vnode */ 562 if (ncp->nc_vp) { 563 numposhits++; 564 nchstats.ncs_goodhits++; 565 *vpp = ncp->nc_vp; 566 CTR4(KTR_VFS, "cache_lookup(%p, %s) found %p via ncp %p", 567 dvp, cnp->cn_nameptr, *vpp, ncp); 568 SDT_PROBE(vfs, namecache, lookup, hit, dvp, nc_get_name(ncp), 569 *vpp, 0, 0); 570 cache_out_ts(ncp, tsp, ticksp); 571 goto success; 572 } 573 574 negative_success: 575 /* We found a negative match, and want to create it, so purge */ 576 if (cnp->cn_nameiop == CREATE) { 577 numnegzaps++; 578 nchstats.ncs_badhits++; 579 if (!wlocked && !CACHE_UPGRADE_LOCK()) 580 goto wlock; 581 cache_zap(ncp); 582 CACHE_WUNLOCK(); 583 return (0); 584 } 585 586 if (!wlocked && !CACHE_UPGRADE_LOCK()) 587 goto wlock; 588 numneghits++; 589 /* 590 * We found a "negative" match, so we shift it to the end of 591 * the "negative" cache entries queue to satisfy LRU. Also, 592 * check to see if the entry is a whiteout; indicate this to 593 * the componentname, if so. 594 */ 595 TAILQ_REMOVE(&ncneg, ncp, nc_dst); 596 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 597 nchstats.ncs_neghits++; 598 if (ncp->nc_flag & NCF_WHITE) 599 cnp->cn_flags |= ISWHITEOUT; 600 SDT_PROBE(vfs, namecache, lookup, hit_negative, dvp, nc_get_name(ncp), 601 0, 0, 0); 602 cache_out_ts(ncp, tsp, ticksp); 603 CACHE_WUNLOCK(); 604 return (ENOENT); 605 606 wlock: 607 /* 608 * We need to update the cache after our lookup, so upgrade to 609 * a write lock and retry the operation. 610 */ 611 CACHE_RUNLOCK(); 612 CACHE_WLOCK(); 613 numupgrades++; 614 wlocked = 1; 615 goto retry_wlocked; 616 617 success: 618 /* 619 * On success we return a locked and ref'd vnode as per the lookup 620 * protocol. 621 */ 622 if (dvp == *vpp) { /* lookup on "." */ 623 VREF(*vpp); 624 if (wlocked) 625 CACHE_WUNLOCK(); 626 else 627 CACHE_RUNLOCK(); 628 /* 629 * When we lookup "." we still can be asked to lock it 630 * differently... 631 */ 632 ltype = cnp->cn_lkflags & LK_TYPE_MASK; 633 if (ltype != VOP_ISLOCKED(*vpp)) { 634 if (ltype == LK_EXCLUSIVE) { 635 vn_lock(*vpp, LK_UPGRADE | LK_RETRY); 636 if ((*vpp)->v_iflag & VI_DOOMED) { 637 /* forced unmount */ 638 vrele(*vpp); 639 *vpp = NULL; 640 return (ENOENT); 641 } 642 } else 643 vn_lock(*vpp, LK_DOWNGRADE | LK_RETRY); 644 } 645 return (-1); 646 } 647 ltype = 0; /* silence gcc warning */ 648 if (cnp->cn_flags & ISDOTDOT) { 649 ltype = VOP_ISLOCKED(dvp); 650 VOP_UNLOCK(dvp, 0); 651 } 652 VI_LOCK(*vpp); 653 if (wlocked) 654 CACHE_WUNLOCK(); 655 else 656 CACHE_RUNLOCK(); 657 error = vget(*vpp, cnp->cn_lkflags | LK_INTERLOCK, cnp->cn_thread); 658 if (cnp->cn_flags & ISDOTDOT) { 659 vn_lock(dvp, ltype | LK_RETRY); 660 if (dvp->v_iflag & VI_DOOMED) { 661 if (error == 0) 662 vput(*vpp); 663 *vpp = NULL; 664 return (ENOENT); 665 } 666 } 667 if (error) { 668 *vpp = NULL; 669 goto retry; 670 } 671 if ((cnp->cn_flags & ISLASTCN) && 672 (cnp->cn_lkflags & LK_TYPE_MASK) == LK_EXCLUSIVE) { 673 ASSERT_VOP_ELOCKED(*vpp, "cache_lookup"); 674 } 675 return (-1); 676 677 unlock: 678 if (wlocked) 679 CACHE_WUNLOCK(); 680 else 681 CACHE_RUNLOCK(); 682 return (0); 683 } 684 685 /* 686 * Add an entry to the cache. 687 */ 688 void 689 cache_enter_time(dvp, vp, cnp, tsp) 690 struct vnode *dvp; 691 struct vnode *vp; 692 struct componentname *cnp; 693 struct timespec *tsp; 694 { 695 struct namecache *ncp, *n2; 696 struct namecache_ts *n3; 697 struct nchashhead *ncpp; 698 uint32_t hash; 699 int flag; 700 int hold; 701 int zap; 702 int len; 703 704 CTR3(KTR_VFS, "cache_enter(%p, %p, %s)", dvp, vp, cnp->cn_nameptr); 705 VNASSERT(vp == NULL || (vp->v_iflag & VI_DOOMED) == 0, vp, 706 ("cache_enter: Adding a doomed vnode")); 707 VNASSERT(dvp == NULL || (dvp->v_iflag & VI_DOOMED) == 0, dvp, 708 ("cache_enter: Doomed vnode used as src")); 709 710 if (!doingcache) 711 return; 712 713 /* 714 * Avoid blowout in namecache entries. 715 */ 716 if (numcache >= desiredvnodes * ncsizefactor) 717 return; 718 719 flag = 0; 720 if (cnp->cn_nameptr[0] == '.') { 721 if (cnp->cn_namelen == 1) 722 return; 723 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 724 CACHE_WLOCK(); 725 /* 726 * If dotdot entry already exists, just retarget it 727 * to new parent vnode, otherwise continue with new 728 * namecache entry allocation. 729 */ 730 if ((ncp = dvp->v_cache_dd) != NULL && 731 ncp->nc_flag & NCF_ISDOTDOT) { 732 KASSERT(ncp->nc_dvp == dvp, 733 ("wrong isdotdot parent")); 734 if (ncp->nc_vp != NULL) 735 TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, 736 ncp, nc_dst); 737 else 738 TAILQ_REMOVE(&ncneg, ncp, nc_dst); 739 if (vp != NULL) 740 TAILQ_INSERT_HEAD(&vp->v_cache_dst, 741 ncp, nc_dst); 742 else 743 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 744 ncp->nc_vp = vp; 745 CACHE_WUNLOCK(); 746 return; 747 } 748 dvp->v_cache_dd = NULL; 749 SDT_PROBE(vfs, namecache, enter, done, dvp, "..", vp, 750 0, 0); 751 CACHE_WUNLOCK(); 752 flag = NCF_ISDOTDOT; 753 } 754 } 755 756 hold = 0; 757 zap = 0; 758 759 /* 760 * Calculate the hash key and setup as much of the new 761 * namecache entry as possible before acquiring the lock. 762 */ 763 ncp = cache_alloc(cnp->cn_namelen, tsp != NULL); 764 ncp->nc_vp = vp; 765 ncp->nc_dvp = dvp; 766 ncp->nc_flag = flag; 767 if (tsp != NULL) { 768 n3 = (struct namecache_ts *)ncp; 769 n3->nc_time = *tsp; 770 n3->nc_ticks = ticks; 771 n3->nc_flag |= NCF_TS; 772 } 773 len = ncp->nc_nlen = cnp->cn_namelen; 774 hash = fnv_32_buf(cnp->cn_nameptr, len, FNV1_32_INIT); 775 strlcpy(nc_get_name(ncp), cnp->cn_nameptr, len + 1); 776 hash = fnv_32_buf(&dvp, sizeof(dvp), hash); 777 CACHE_WLOCK(); 778 779 /* 780 * See if this vnode or negative entry is already in the cache 781 * with this name. This can happen with concurrent lookups of 782 * the same path name. 783 */ 784 ncpp = NCHHASH(hash); 785 LIST_FOREACH(n2, ncpp, nc_hash) { 786 if (n2->nc_dvp == dvp && 787 n2->nc_nlen == cnp->cn_namelen && 788 !bcmp(nc_get_name(n2), cnp->cn_nameptr, n2->nc_nlen)) { 789 if (tsp != NULL) { 790 KASSERT((n2->nc_flag & NCF_TS) != 0, 791 ("no NCF_TS")); 792 n3 = (struct namecache_ts *)n2; 793 n3->nc_time = 794 ((struct namecache_ts *)ncp)->nc_time; 795 n3->nc_ticks = 796 ((struct namecache_ts *)ncp)->nc_ticks; 797 } 798 CACHE_WUNLOCK(); 799 cache_free(ncp); 800 return; 801 } 802 } 803 804 if (flag == NCF_ISDOTDOT) { 805 /* 806 * See if we are trying to add .. entry, but some other lookup 807 * has populated v_cache_dd pointer already. 808 */ 809 if (dvp->v_cache_dd != NULL) { 810 CACHE_WUNLOCK(); 811 cache_free(ncp); 812 return; 813 } 814 KASSERT(vp == NULL || vp->v_type == VDIR, 815 ("wrong vnode type %p", vp)); 816 dvp->v_cache_dd = ncp; 817 } 818 819 numcache++; 820 if (!vp) { 821 numneg++; 822 if (cnp->cn_flags & ISWHITEOUT) 823 ncp->nc_flag |= NCF_WHITE; 824 } else if (vp->v_type == VDIR) { 825 if (flag != NCF_ISDOTDOT) { 826 if ((n2 = vp->v_cache_dd) != NULL && 827 (n2->nc_flag & NCF_ISDOTDOT) != 0) 828 cache_zap(n2); 829 vp->v_cache_dd = ncp; 830 } 831 } else { 832 vp->v_cache_dd = NULL; 833 } 834 835 /* 836 * Insert the new namecache entry into the appropriate chain 837 * within the cache entries table. 838 */ 839 LIST_INSERT_HEAD(ncpp, ncp, nc_hash); 840 if (flag != NCF_ISDOTDOT) { 841 if (LIST_EMPTY(&dvp->v_cache_src)) { 842 hold = 1; 843 numcachehv++; 844 } 845 LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src); 846 } 847 848 /* 849 * If the entry is "negative", we place it into the 850 * "negative" cache queue, otherwise, we place it into the 851 * destination vnode's cache entries queue. 852 */ 853 if (vp) { 854 TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst); 855 SDT_PROBE(vfs, namecache, enter, done, dvp, nc_get_name(ncp), 856 vp, 0, 0); 857 } else { 858 TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); 859 SDT_PROBE(vfs, namecache, enter_negative, done, dvp, 860 nc_get_name(ncp), 0, 0, 0); 861 } 862 if (numneg * ncnegfactor > numcache) { 863 ncp = TAILQ_FIRST(&ncneg); 864 zap = 1; 865 } 866 if (hold) 867 vhold(dvp); 868 if (zap) 869 cache_zap(ncp); 870 CACHE_WUNLOCK(); 871 } 872 873 /* 874 * Name cache initialization, from vfs_init() when we are booting 875 */ 876 static void 877 nchinit(void *dummy __unused) 878 { 879 880 TAILQ_INIT(&ncneg); 881 882 cache_zone_small = uma_zcreate("S VFS Cache", 883 sizeof(struct namecache) + CACHE_PATH_CUTOFF + 1, 884 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT); 885 cache_zone_small_ts = uma_zcreate("STS VFS Cache", 886 sizeof(struct namecache_ts) + CACHE_PATH_CUTOFF + 1, 887 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT); 888 cache_zone_large = uma_zcreate("L VFS Cache", 889 sizeof(struct namecache_ts) + NAME_MAX + 1, 890 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT); 891 892 nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash); 893 } 894 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL); 895 896 897 /* 898 * Invalidate all entries to a particular vnode. 899 */ 900 void 901 cache_purge(vp) 902 struct vnode *vp; 903 { 904 905 CTR1(KTR_VFS, "cache_purge(%p)", vp); 906 SDT_PROBE(vfs, namecache, purge, done, vp, 0, 0, 0, 0); 907 CACHE_WLOCK(); 908 while (!LIST_EMPTY(&vp->v_cache_src)) 909 cache_zap(LIST_FIRST(&vp->v_cache_src)); 910 while (!TAILQ_EMPTY(&vp->v_cache_dst)) 911 cache_zap(TAILQ_FIRST(&vp->v_cache_dst)); 912 if (vp->v_cache_dd != NULL) { 913 KASSERT(vp->v_cache_dd->nc_flag & NCF_ISDOTDOT, 914 ("lost dotdot link")); 915 cache_zap(vp->v_cache_dd); 916 } 917 KASSERT(vp->v_cache_dd == NULL, ("incomplete purge")); 918 CACHE_WUNLOCK(); 919 } 920 921 /* 922 * Invalidate all negative entries for a particular directory vnode. 923 */ 924 void 925 cache_purge_negative(vp) 926 struct vnode *vp; 927 { 928 struct namecache *cp, *ncp; 929 930 CTR1(KTR_VFS, "cache_purge_negative(%p)", vp); 931 SDT_PROBE(vfs, namecache, purge_negative, done, vp, 0, 0, 0, 0); 932 CACHE_WLOCK(); 933 LIST_FOREACH_SAFE(cp, &vp->v_cache_src, nc_src, ncp) { 934 if (cp->nc_vp == NULL) 935 cache_zap(cp); 936 } 937 CACHE_WUNLOCK(); 938 } 939 940 /* 941 * Flush all entries referencing a particular filesystem. 942 */ 943 void 944 cache_purgevfs(mp) 945 struct mount *mp; 946 { 947 struct nchashhead *ncpp; 948 struct namecache *ncp, *nnp; 949 950 /* Scan hash tables for applicable entries */ 951 SDT_PROBE(vfs, namecache, purgevfs, done, mp, 0, 0, 0, 0); 952 CACHE_WLOCK(); 953 for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) { 954 LIST_FOREACH_SAFE(ncp, ncpp, nc_hash, nnp) { 955 if (ncp->nc_dvp->v_mount == mp) 956 cache_zap(ncp); 957 } 958 } 959 CACHE_WUNLOCK(); 960 } 961 962 /* 963 * Perform canonical checks and cache lookup and pass on to filesystem 964 * through the vop_cachedlookup only if needed. 965 */ 966 967 int 968 vfs_cache_lookup(ap) 969 struct vop_lookup_args /* { 970 struct vnode *a_dvp; 971 struct vnode **a_vpp; 972 struct componentname *a_cnp; 973 } */ *ap; 974 { 975 struct vnode *dvp; 976 int error; 977 struct vnode **vpp = ap->a_vpp; 978 struct componentname *cnp = ap->a_cnp; 979 struct ucred *cred = cnp->cn_cred; 980 int flags = cnp->cn_flags; 981 struct thread *td = cnp->cn_thread; 982 983 *vpp = NULL; 984 dvp = ap->a_dvp; 985 986 if (dvp->v_type != VDIR) 987 return (ENOTDIR); 988 989 if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && 990 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) 991 return (EROFS); 992 993 error = VOP_ACCESS(dvp, VEXEC, cred, td); 994 if (error) 995 return (error); 996 997 error = cache_lookup(dvp, vpp, cnp); 998 if (error == 0) 999 return (VOP_CACHEDLOOKUP(dvp, vpp, cnp)); 1000 if (error == -1) 1001 return (0); 1002 return (error); 1003 } 1004 1005 1006 #ifndef _SYS_SYSPROTO_H_ 1007 struct __getcwd_args { 1008 u_char *buf; 1009 u_int buflen; 1010 }; 1011 #endif 1012 1013 /* 1014 * XXX All of these sysctls would probably be more productive dead. 1015 */ 1016 static int disablecwd; 1017 SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0, 1018 "Disable the getcwd syscall"); 1019 1020 /* Implementation of the getcwd syscall. */ 1021 int 1022 sys___getcwd(td, uap) 1023 struct thread *td; 1024 struct __getcwd_args *uap; 1025 { 1026 1027 return (kern___getcwd(td, uap->buf, UIO_USERSPACE, uap->buflen)); 1028 } 1029 1030 int 1031 kern___getcwd(struct thread *td, u_char *buf, enum uio_seg bufseg, u_int buflen) 1032 { 1033 char *bp, *tmpbuf; 1034 struct filedesc *fdp; 1035 struct vnode *cdir, *rdir; 1036 int error, vfslocked; 1037 1038 if (disablecwd) 1039 return (ENODEV); 1040 if (buflen < 2) 1041 return (EINVAL); 1042 if (buflen > MAXPATHLEN) 1043 buflen = MAXPATHLEN; 1044 1045 tmpbuf = malloc(buflen, M_TEMP, M_WAITOK); 1046 fdp = td->td_proc->p_fd; 1047 FILEDESC_SLOCK(fdp); 1048 cdir = fdp->fd_cdir; 1049 VREF(cdir); 1050 rdir = fdp->fd_rdir; 1051 VREF(rdir); 1052 FILEDESC_SUNLOCK(fdp); 1053 error = vn_fullpath1(td, cdir, rdir, tmpbuf, &bp, buflen); 1054 vfslocked = VFS_LOCK_GIANT(rdir->v_mount); 1055 vrele(rdir); 1056 VFS_UNLOCK_GIANT(vfslocked); 1057 vfslocked = VFS_LOCK_GIANT(cdir->v_mount); 1058 vrele(cdir); 1059 VFS_UNLOCK_GIANT(vfslocked); 1060 1061 if (!error) { 1062 if (bufseg == UIO_SYSSPACE) 1063 bcopy(bp, buf, strlen(bp) + 1); 1064 else 1065 error = copyout(bp, buf, strlen(bp) + 1); 1066 #ifdef KTRACE 1067 if (KTRPOINT(curthread, KTR_NAMEI)) 1068 ktrnamei(bp); 1069 #endif 1070 } 1071 free(tmpbuf, M_TEMP); 1072 return (error); 1073 } 1074 1075 /* 1076 * Thus begins the fullpath magic. 1077 */ 1078 1079 #undef STATNODE 1080 #define STATNODE(name, descr) \ 1081 static u_int name; \ 1082 SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, descr) 1083 1084 static int disablefullpath; 1085 SYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, &disablefullpath, 0, 1086 "Disable the vn_fullpath function"); 1087 1088 /* These count for kern___getcwd(), too. */ 1089 STATNODE(numfullpathcalls, "Number of fullpath search calls"); 1090 STATNODE(numfullpathfail1, "Number of fullpath search errors (ENOTDIR)"); 1091 STATNODE(numfullpathfail2, 1092 "Number of fullpath search errors (VOP_VPTOCNP failures)"); 1093 STATNODE(numfullpathfail4, "Number of fullpath search errors (ENOMEM)"); 1094 STATNODE(numfullpathfound, "Number of successful fullpath calls"); 1095 1096 /* 1097 * Retrieve the full filesystem path that correspond to a vnode from the name 1098 * cache (if available) 1099 */ 1100 int 1101 vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf) 1102 { 1103 char *buf; 1104 struct filedesc *fdp; 1105 struct vnode *rdir; 1106 int error, vfslocked; 1107 1108 if (disablefullpath) 1109 return (ENODEV); 1110 if (vn == NULL) 1111 return (EINVAL); 1112 1113 buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 1114 fdp = td->td_proc->p_fd; 1115 FILEDESC_SLOCK(fdp); 1116 rdir = fdp->fd_rdir; 1117 VREF(rdir); 1118 FILEDESC_SUNLOCK(fdp); 1119 error = vn_fullpath1(td, vn, rdir, buf, retbuf, MAXPATHLEN); 1120 vfslocked = VFS_LOCK_GIANT(rdir->v_mount); 1121 vrele(rdir); 1122 VFS_UNLOCK_GIANT(vfslocked); 1123 1124 if (!error) 1125 *freebuf = buf; 1126 else 1127 free(buf, M_TEMP); 1128 return (error); 1129 } 1130 1131 /* 1132 * This function is similar to vn_fullpath, but it attempts to lookup the 1133 * pathname relative to the global root mount point. This is required for the 1134 * auditing sub-system, as audited pathnames must be absolute, relative to the 1135 * global root mount point. 1136 */ 1137 int 1138 vn_fullpath_global(struct thread *td, struct vnode *vn, 1139 char **retbuf, char **freebuf) 1140 { 1141 char *buf; 1142 int error; 1143 1144 if (disablefullpath) 1145 return (ENODEV); 1146 if (vn == NULL) 1147 return (EINVAL); 1148 buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 1149 error = vn_fullpath1(td, vn, rootvnode, buf, retbuf, MAXPATHLEN); 1150 if (!error) 1151 *freebuf = buf; 1152 else 1153 free(buf, M_TEMP); 1154 return (error); 1155 } 1156 1157 int 1158 vn_vptocnp(struct vnode **vp, struct ucred *cred, char *buf, u_int *buflen) 1159 { 1160 int error; 1161 1162 CACHE_RLOCK(); 1163 error = vn_vptocnp_locked(vp, cred, buf, buflen); 1164 if (error == 0) 1165 CACHE_RUNLOCK(); 1166 return (error); 1167 } 1168 1169 static int 1170 vn_vptocnp_locked(struct vnode **vp, struct ucred *cred, char *buf, 1171 u_int *buflen) 1172 { 1173 struct vnode *dvp; 1174 struct namecache *ncp; 1175 int error, vfslocked; 1176 1177 TAILQ_FOREACH(ncp, &((*vp)->v_cache_dst), nc_dst) { 1178 if ((ncp->nc_flag & NCF_ISDOTDOT) == 0) 1179 break; 1180 } 1181 if (ncp != NULL) { 1182 if (*buflen < ncp->nc_nlen) { 1183 CACHE_RUNLOCK(); 1184 vfslocked = VFS_LOCK_GIANT((*vp)->v_mount); 1185 vrele(*vp); 1186 VFS_UNLOCK_GIANT(vfslocked); 1187 numfullpathfail4++; 1188 error = ENOMEM; 1189 SDT_PROBE(vfs, namecache, fullpath, return, error, 1190 vp, NULL, 0, 0); 1191 return (error); 1192 } 1193 *buflen -= ncp->nc_nlen; 1194 memcpy(buf + *buflen, nc_get_name(ncp), ncp->nc_nlen); 1195 SDT_PROBE(vfs, namecache, fullpath, hit, ncp->nc_dvp, 1196 nc_get_name(ncp), vp, 0, 0); 1197 dvp = *vp; 1198 *vp = ncp->nc_dvp; 1199 vref(*vp); 1200 CACHE_RUNLOCK(); 1201 vfslocked = VFS_LOCK_GIANT(dvp->v_mount); 1202 vrele(dvp); 1203 VFS_UNLOCK_GIANT(vfslocked); 1204 CACHE_RLOCK(); 1205 return (0); 1206 } 1207 SDT_PROBE(vfs, namecache, fullpath, miss, vp, 0, 0, 0, 0); 1208 1209 CACHE_RUNLOCK(); 1210 vfslocked = VFS_LOCK_GIANT((*vp)->v_mount); 1211 vn_lock(*vp, LK_SHARED | LK_RETRY); 1212 error = VOP_VPTOCNP(*vp, &dvp, cred, buf, buflen); 1213 vput(*vp); 1214 VFS_UNLOCK_GIANT(vfslocked); 1215 if (error) { 1216 numfullpathfail2++; 1217 SDT_PROBE(vfs, namecache, fullpath, return, error, vp, 1218 NULL, 0, 0); 1219 return (error); 1220 } 1221 1222 *vp = dvp; 1223 CACHE_RLOCK(); 1224 if (dvp->v_iflag & VI_DOOMED) { 1225 /* forced unmount */ 1226 CACHE_RUNLOCK(); 1227 vfslocked = VFS_LOCK_GIANT(dvp->v_mount); 1228 vrele(dvp); 1229 VFS_UNLOCK_GIANT(vfslocked); 1230 error = ENOENT; 1231 SDT_PROBE(vfs, namecache, fullpath, return, error, vp, 1232 NULL, 0, 0); 1233 return (error); 1234 } 1235 /* 1236 * *vp has its use count incremented still. 1237 */ 1238 1239 return (0); 1240 } 1241 1242 /* 1243 * The magic behind kern___getcwd() and vn_fullpath(). 1244 */ 1245 static int 1246 vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir, 1247 char *buf, char **retbuf, u_int buflen) 1248 { 1249 int error, slash_prefixed, vfslocked; 1250 #ifdef KDTRACE_HOOKS 1251 struct vnode *startvp = vp; 1252 #endif 1253 struct vnode *vp1; 1254 1255 buflen--; 1256 buf[buflen] = '\0'; 1257 error = 0; 1258 slash_prefixed = 0; 1259 1260 SDT_PROBE(vfs, namecache, fullpath, entry, vp, 0, 0, 0, 0); 1261 numfullpathcalls++; 1262 vref(vp); 1263 CACHE_RLOCK(); 1264 if (vp->v_type != VDIR) { 1265 error = vn_vptocnp_locked(&vp, td->td_ucred, buf, &buflen); 1266 if (error) 1267 return (error); 1268 if (buflen == 0) { 1269 CACHE_RUNLOCK(); 1270 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1271 vrele(vp); 1272 VFS_UNLOCK_GIANT(vfslocked); 1273 return (ENOMEM); 1274 } 1275 buf[--buflen] = '/'; 1276 slash_prefixed = 1; 1277 } 1278 while (vp != rdir && vp != rootvnode) { 1279 if (vp->v_vflag & VV_ROOT) { 1280 if (vp->v_iflag & VI_DOOMED) { /* forced unmount */ 1281 CACHE_RUNLOCK(); 1282 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1283 vrele(vp); 1284 VFS_UNLOCK_GIANT(vfslocked); 1285 error = ENOENT; 1286 SDT_PROBE(vfs, namecache, fullpath, return, 1287 error, vp, NULL, 0, 0); 1288 break; 1289 } 1290 vp1 = vp->v_mount->mnt_vnodecovered; 1291 vref(vp1); 1292 CACHE_RUNLOCK(); 1293 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1294 vrele(vp); 1295 VFS_UNLOCK_GIANT(vfslocked); 1296 vp = vp1; 1297 CACHE_RLOCK(); 1298 continue; 1299 } 1300 if (vp->v_type != VDIR) { 1301 CACHE_RUNLOCK(); 1302 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1303 vrele(vp); 1304 VFS_UNLOCK_GIANT(vfslocked); 1305 numfullpathfail1++; 1306 error = ENOTDIR; 1307 SDT_PROBE(vfs, namecache, fullpath, return, 1308 error, vp, NULL, 0, 0); 1309 break; 1310 } 1311 error = vn_vptocnp_locked(&vp, td->td_ucred, buf, &buflen); 1312 if (error) 1313 break; 1314 if (buflen == 0) { 1315 CACHE_RUNLOCK(); 1316 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1317 vrele(vp); 1318 VFS_UNLOCK_GIANT(vfslocked); 1319 error = ENOMEM; 1320 SDT_PROBE(vfs, namecache, fullpath, return, error, 1321 startvp, NULL, 0, 0); 1322 break; 1323 } 1324 buf[--buflen] = '/'; 1325 slash_prefixed = 1; 1326 } 1327 if (error) 1328 return (error); 1329 if (!slash_prefixed) { 1330 if (buflen == 0) { 1331 CACHE_RUNLOCK(); 1332 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1333 vrele(vp); 1334 VFS_UNLOCK_GIANT(vfslocked); 1335 numfullpathfail4++; 1336 SDT_PROBE(vfs, namecache, fullpath, return, ENOMEM, 1337 startvp, NULL, 0, 0); 1338 return (ENOMEM); 1339 } 1340 buf[--buflen] = '/'; 1341 } 1342 numfullpathfound++; 1343 CACHE_RUNLOCK(); 1344 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1345 vrele(vp); 1346 VFS_UNLOCK_GIANT(vfslocked); 1347 1348 SDT_PROBE(vfs, namecache, fullpath, return, 0, startvp, buf + buflen, 1349 0, 0); 1350 *retbuf = buf + buflen; 1351 return (0); 1352 } 1353 1354 int 1355 vn_commname(struct vnode *vp, char *buf, u_int buflen) 1356 { 1357 struct namecache *ncp; 1358 int l; 1359 1360 CACHE_RLOCK(); 1361 TAILQ_FOREACH(ncp, &vp->v_cache_dst, nc_dst) 1362 if ((ncp->nc_flag & NCF_ISDOTDOT) == 0) 1363 break; 1364 if (ncp == NULL) { 1365 CACHE_RUNLOCK(); 1366 return (ENOENT); 1367 } 1368 l = min(ncp->nc_nlen, buflen - 1); 1369 memcpy(buf, nc_get_name(ncp), l); 1370 CACHE_RUNLOCK(); 1371 buf[l] = '\0'; 1372 return (0); 1373 } 1374 1375 /* ABI compat shims for old kernel modules. */ 1376 #undef cache_enter 1377 #undef cache_lookup 1378 1379 void cache_enter(struct vnode *dvp, struct vnode *vp, 1380 struct componentname *cnp); 1381 int cache_lookup(struct vnode *dvp, struct vnode **vpp, 1382 struct componentname *cnp); 1383 1384 void 1385 cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp) 1386 { 1387 1388 cache_enter_time(dvp, vp, cnp, NULL); 1389 } 1390 1391 int 1392 cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp) 1393 { 1394 1395 return (cache_lookup_times(dvp, vpp, cnp, NULL, NULL)); 1396 } 1397 1398 /* 1399 * This function updates path string to vnode's full global path 1400 * and checks the size of the new path string against the pathlen argument. 1401 * 1402 * Requires a locked, referenced vnode and GIANT lock held. 1403 * Vnode is re-locked on success or ENODEV, otherwise unlocked. 1404 * 1405 * If sysctl debug.disablefullpath is set, ENODEV is returned, 1406 * vnode is left locked and path remain untouched. 1407 * 1408 * If vp is a directory, the call to vn_fullpath_global() always succeeds 1409 * because it falls back to the ".." lookup if the namecache lookup fails. 1410 */ 1411 int 1412 vn_path_to_global_path(struct thread *td, struct vnode *vp, char *path, 1413 u_int pathlen) 1414 { 1415 struct nameidata nd; 1416 struct vnode *vp1; 1417 char *rpath, *fbuf; 1418 int error, vfslocked; 1419 1420 VFS_ASSERT_GIANT(vp->v_mount); 1421 ASSERT_VOP_ELOCKED(vp, __func__); 1422 1423 /* Return ENODEV if sysctl debug.disablefullpath==1 */ 1424 if (disablefullpath) 1425 return (ENODEV); 1426 1427 /* Construct global filesystem path from vp. */ 1428 VOP_UNLOCK(vp, 0); 1429 error = vn_fullpath_global(td, vp, &rpath, &fbuf); 1430 1431 if (error != 0) { 1432 vrele(vp); 1433 return (error); 1434 } 1435 1436 if (strlen(rpath) >= pathlen) { 1437 vrele(vp); 1438 error = ENAMETOOLONG; 1439 goto out; 1440 } 1441 1442 /* 1443 * Re-lookup the vnode by path to detect a possible rename. 1444 * As a side effect, the vnode is relocked. 1445 * If vnode was renamed, return ENOENT. 1446 */ 1447 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1, 1448 UIO_SYSSPACE, path, td); 1449 error = namei(&nd); 1450 if (error != 0) { 1451 vrele(vp); 1452 goto out; 1453 } 1454 vfslocked = NDHASGIANT(&nd); 1455 NDFREE(&nd, NDF_ONLY_PNBUF); 1456 vp1 = nd.ni_vp; 1457 vrele(vp); 1458 if (vp1 == vp) 1459 strcpy(path, rpath); 1460 else { 1461 vput(vp1); 1462 error = ENOENT; 1463 } 1464 VFS_UNLOCK_GIANT(vfslocked); 1465 1466 out: 1467 free(fbuf, M_TEMP); 1468 return (error); 1469 } 1470