xref: /freebsd/sys/kern/vfs_cache.c (revision ce834215a70ff69e7e222827437116eee2f9ac6f)
1 /*
2  * Copyright (c) 1989, 1993, 1995
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Poul-Henning Kamp of the FreeBSD Project.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	@(#)vfs_cache.c	8.5 (Berkeley) 3/22/95
37  * $Id: vfs_cache.c,v 1.24 1997/03/08 15:22:14 bde Exp $
38  */
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/kernel.h>
43 #include <sys/sysctl.h>
44 #include <sys/time.h>
45 #include <sys/mount.h>
46 #include <sys/vnode.h>
47 #include <sys/namei.h>
48 #include <sys/errno.h>
49 #include <sys/malloc.h>
50 
51 #define MAXVNODEUSE 32
52 
53 /*
54  * Name caching works as follows:
55  *
56  * Names found by directory scans are retained in a cache
57  * for future reference.  It is managed LRU, so frequently
58  * used names will hang around.  Cache is indexed by hash value
59  * obtained from (vp, name) where vp refers to the directory
60  * containing name.
61  *
62  * If it is a "negative" entry, (i.e. for a name that is known NOT to
63  * exist) the vnode pointer will be NULL.
64  *
65  * Upon reaching the last segment of a path, if the reference
66  * is for DELETE, or NOCACHE is set (rewrite), and the
67  * name is located in the cache, it will be dropped.
68  */
69 
70 /*
71  * Structures associated with name cacheing.
72  */
73 #define NCHHASH(dvp, cnp) \
74 	(&nchashtbl[((dvp)->v_id + (cnp)->cn_hash) % nchash])
75 static LIST_HEAD(nchashhead, namecache) *nchashtbl;	/* Hash Table */
76 static TAILQ_HEAD(, namecache) ncneg;	/* Hash Table */
77 static u_long	nchash;			/* size of hash table */
78 static u_long	ncnegfactor = 16;	/* ratio of negative entries */
79 SYSCTL_INT(_debug, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, "");
80 static u_long	numneg;		/* number of cache entries allocated */
81 SYSCTL_INT(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, "");
82 static u_long	numcache;		/* number of cache entries allocated */
83 SYSCTL_INT(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, "");
84 struct	nchstats nchstats;		/* cache effectiveness statistics */
85 
86 static int	doingcache = 1;		/* 1 => enable the cache */
87 SYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, "");
88 SYSCTL_INT(_debug, OID_AUTO, vnsize, CTLFLAG_RD, 0, sizeof(struct vnode), "");
89 SYSCTL_INT(_debug, OID_AUTO, ncsize, CTLFLAG_RD, 0, sizeof(struct namecache), "");
90 
91 static void cache_zap __P((struct namecache *ncp));
92 
93 /*
94  * Flags in namecache.nc_flag
95  */
96 #define NCF_WHITE	1
97 /*
98  * Delete an entry from its hash list and move it to the front
99  * of the LRU list for immediate reuse.
100  */
101 static void
102 cache_zap(ncp)
103 	struct namecache *ncp;
104 {
105 	LIST_REMOVE(ncp, nc_hash);
106 	LIST_REMOVE(ncp, nc_src);
107 	if (ncp->nc_vp) {
108 		TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst);
109 	} else {
110 		TAILQ_REMOVE(&ncneg, ncp, nc_dst);
111 		numneg--;
112 	}
113 	numcache--;
114 	free(ncp, M_CACHE);
115 }
116 
117 /*
118  * Lookup an entry in the cache
119  *
120  * We don't do this if the segment name is long, simply so the cache
121  * can avoid holding long names (which would either waste space, or
122  * add greatly to the complexity).
123  *
124  * Lookup is called with dvp pointing to the directory to search,
125  * cnp pointing to the name of the entry being sought. If the lookup
126  * succeeds, the vnode is returned in *vpp, and a status of -1 is
127  * returned. If the lookup determines that the name does not exist
128  * (negative cacheing), a status of ENOENT is returned. If the lookup
129  * fails, a status of zero is returned.
130  */
131 
132 int
133 cache_lookup(dvp, vpp, cnp)
134 	struct vnode *dvp;
135 	struct vnode **vpp;
136 	struct componentname *cnp;
137 {
138 	register struct namecache *ncp, *nnp;
139 	register struct nchashhead *ncpp;
140 
141 	if (!doingcache) {
142 		cnp->cn_flags &= ~MAKEENTRY;
143 		return (0);
144 	}
145 
146 	if (cnp->cn_nameptr[0] == '.') {
147 		if (cnp->cn_namelen == 1) {
148 			*vpp = dvp;
149 			return (-1);
150 		}
151 		if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
152 			if (dvp->v_dd->v_id != dvp->v_ddid ||
153 			    (cnp->cn_flags & MAKEENTRY) == 0) {
154 				dvp->v_ddid = 0;
155 				return (0);
156 			}
157 			*vpp = dvp->v_dd;
158 			return (-1);
159 		}
160 	}
161 
162 	LIST_FOREACH(ncp, (NCHHASH(dvp, cnp)), nc_hash) {
163 		if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen &&
164 		    !bcmp(ncp->nc_name, cnp->cn_nameptr, (u_int)ncp->nc_nlen))
165 			break;
166 	}
167 
168 	/* We failed to find an entry */
169 	if (ncp == 0) {
170 		nchstats.ncs_miss++;
171 		return (0);
172 	}
173 
174 	/* We don't want to have an entry, so dump it */
175 	if ((cnp->cn_flags & MAKEENTRY) == 0) {
176 		nchstats.ncs_badhits++;
177 		cache_zap(ncp);
178 		return (0);
179 	}
180 
181 	/* We found a "positive" match, return the vnode */
182         if (ncp->nc_vp) {
183 		nchstats.ncs_goodhits++;
184 		vtouch(ncp->nc_vp);
185 		*vpp = ncp->nc_vp;
186 		return (-1);
187 	}
188 
189 	/* We found a negative match, and want to create it, so purge */
190 	if (cnp->cn_nameiop == CREATE) {
191 		nchstats.ncs_badhits++;
192 		cache_zap(ncp);
193 		return (0);
194 	}
195 
196 	/*
197 	 * We found a "negative" match, ENOENT notifies client of this match.
198 	 * The nc_vpid field records whether this is a whiteout.
199 	 */
200 	TAILQ_REMOVE(&ncneg, ncp, nc_dst);
201 	TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
202 	nchstats.ncs_neghits++;
203 	if (ncp->nc_flag & NCF_WHITE)
204 		cnp->cn_flags |= ISWHITEOUT;
205 	return (ENOENT);
206 }
207 
208 /*
209  * Add an entry to the cache.
210  */
211 void
212 cache_enter(dvp, vp, cnp)
213 	struct vnode *dvp;
214 	struct vnode *vp;
215 	struct componentname *cnp;
216 {
217 	register struct namecache *ncp;
218 	register struct nchashhead *ncpp;
219 
220 	if (!doingcache)
221 		return;
222 
223 	if (cnp->cn_nameptr[0] == '.') {
224 		if (cnp->cn_namelen == 1) {
225 			return;
226 		}
227 		if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
228 			if (vp) {
229 				dvp->v_dd = vp;
230 				dvp->v_ddid = vp->v_id;
231 			} else {
232 				dvp->v_dd = dvp;
233 				dvp->v_ddid = 0;
234 			}
235 			return;
236 		}
237 	}
238 
239 	ncp = (struct namecache *)
240 		malloc(sizeof *ncp + cnp->cn_namelen, M_CACHE, M_WAITOK);
241 	bzero((char *)ncp, sizeof *ncp);
242 	numcache++;
243 	if (!vp)
244 		numneg++;
245 
246 	/*
247 	 * Fill in cache info, if vp is NULL this is a "negative" cache entry.
248 	 * For negative entries, we have to record whether it is a whiteout.
249 	 * the whiteout flag is stored in the nc_vpid field which is
250 	 * otherwise unused.
251 	 */
252 	ncp->nc_vp = vp;
253 	if (vp)
254 		vtouch(vp);
255 	else
256 		ncp->nc_flag = cnp->cn_flags & ISWHITEOUT ? NCF_WHITE : 0;
257 	ncp->nc_dvp = dvp;
258 	ncp->nc_nlen = cnp->cn_namelen;
259 	bcopy(cnp->cn_nameptr, ncp->nc_name, (unsigned)ncp->nc_nlen);
260 	ncpp = NCHHASH(dvp, cnp);
261 	LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
262 	LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src);
263 	if (vp) {
264 		TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst);
265 	} else {
266 		TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
267 	}
268 	if (numneg*ncnegfactor > numcache) {
269 		ncp = TAILQ_FIRST(&ncneg);
270 		cache_zap(ncp);
271 	}
272 }
273 
274 /*
275  * Name cache initialization, from vfs_init() when we are booting
276  */
277 void
278 nchinit()
279 {
280 
281 	TAILQ_INIT(&ncneg);
282 	nchashtbl = phashinit(desiredvnodes, M_CACHE, &nchash);
283 }
284 
285 /*
286  * Invalidate all entries to particular vnode.
287  *
288  * We actually just increment the v_id, that will do it. The stale entries
289  * will be purged by lookup as they get found. If the v_id wraps around, we
290  * need to ditch the entire cache, to avoid confusion. No valid vnode will
291  * ever have (v_id == 0).
292  */
293 void
294 cache_purge(vp)
295 	struct vnode *vp;
296 {
297 	struct namecache *ncp;
298 	struct nchashhead *ncpp;
299 	static u_long nextvnodeid;
300 
301 	while (!LIST_EMPTY(&vp->v_cache_src))
302 		cache_zap(LIST_FIRST(&vp->v_cache_src));
303 	while (!TAILQ_EMPTY(&vp->v_cache_dst))
304 		cache_zap(TAILQ_FIRST(&vp->v_cache_dst));
305 
306 	/* Never assign the same v_id, and never assign zero as v_id */
307 	do {
308 		if (++nextvnodeid == vp->v_id)
309 			++nextvnodeid;
310 	} while (!nextvnodeid);
311 
312 	vp->v_id = nextvnodeid;
313 	vp->v_dd = vp;
314 	vp->v_ddid = 0;
315 }
316 
317 /*
318  * Flush all entries referencing a particular filesystem.
319  *
320  * Since we need to check it anyway, we will flush all the invalid
321  * entries at the same time.
322  */
323 void
324 cache_purgevfs(mp)
325 	struct mount *mp;
326 {
327 	struct nchashhead *ncpp;
328 	struct namecache *ncp, *nnp;
329 
330 	/* Scan hash tables for applicable entries */
331 	for (ncpp = &nchashtbl[nchash - 1]; ncpp >= nchashtbl; ncpp--) {
332 		for (ncp = LIST_FIRST(ncpp); ncp != 0; ncp = nnp) {
333 			nnp = LIST_NEXT(ncp, nc_hash);
334 			if (ncp->nc_dvp->v_mount == mp) {
335 				cache_zap(ncp);
336 			}
337 		}
338 	}
339 }
340