xref: /illumos-gate/usr/src/stand/lib/fs/common/cache.c (revision e4c795beb33bf59dd4ad2e3f88f493111484b890)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  *  Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  *  Use is subject to license terms.
25  *
26  *  This is mostly new code.  Major revisions were made to allow multiple
27  *  file systems to share a common cache.  While this consisted primarily
28  *  of including a "devid_t" pointer in the hash functions, I also re-
29  *  organized everything to eliminate much of the duplicated code that
30  *  had existed previously.
31  */
32 
33 #pragma ident	"%Z%%M%	%I%	%E% SMI"
34 
35 #include <sys/param.h>
36 #include <sys/vnode.h>
37 #include <sys/sysmacros.h>
38 #include <sys/filep.h>
39 #include <sys/salib.h>
40 #include <sys/promif.h>
41 
42 #ifndef	ICACHE_SIZE
43 /*
44  *  These should probably be defined in an architecture-specific header
45  *  file.  The values below are analogous to those used in earlier versions
46  *  of this module.
47  */
48 
49 #define	ICACHE_SIZE 350	    /* Max number of I-node in file cache	*/
50 #define	DCACHE_SIZE 1500    /* Max number of cached directories		*/
51 #define	BCACHE_SIZE 250	    /* Max number of cached disk blocks		*/
52 #endif
53 
54 #define	Next 0		    /* Next pointer in Fwd/Bak link		*/
55 #define	Prev 1		    /* Previous pointer in Fwd/Back links	*/
56 
57 #define	Frst 0		    /* Ptr to first element of a chain		*/
58 #define	Last 1		    /* Ptr to last element of a chain		*/
59 
60 #define	Hash 2		    /* Offset of hash chain ptrs.		*/
61 
62 typedef struct cache {	    /* Generic cache element:			*/
63     struct cache *link[4];  /* .. Fwd/Bak links for hash chain & LRU	*/
64     struct cache **chn;	    /* .. Hash chain link			*/
65     int		   dev;	    /* .. Device file handle			*/
66     void	 *data;	    /* .. Ptr to associated data		*/
67     int		  size;	    /* .. Size of cached data			*/
68 } cache_t;
69 
70 typedef struct head {	    /* Generic cache header:			*/
71 	cache_t	   *aged[2];	/* .. LRU list				*/
72 	int (*cmp)(cache_t *);	/* .. Ptr to comparison function	*/
73 	int	  size;		/* .. Size of "cache" objects		*/
74 	int	  maxblks;	/* .. Max number of cached elements	*/
75 	int	  count;	/* .. Current number of cached elements	*/
76 	int	  hits;		/* .. Total cache hits			*/
77 	int	  searches;	/* .. Total searches			*/
78 	int	  purges;	/* .. Total purges			*/
79 } head_t;
80 
81 /* Constructor for cache headers:					*/
82 #define	cache_head(h, f, t, n) \
83 	{{(cache_t *)&h, (cache_t *)&h}, f, sizeof (t), n}
84 
85 int read_opt;		/* Number of times cache was bypassed	*/
86 static int x_dev;	/* Target device ID saved here!		*/
87 static int x_len;	/* length of object			*/
88 
89 #define	LOG2(x) \
90 	(((x) <= 16)  ?	 4 : /* Yeah, it's ugly.  But it works! */ \
91 	(((x) <= 32)  ?  5 : /* .. Binary log should be part of */ \
92 	(((x) <= 64)  ?  6 : /* .. the language!		*/ \
93 	(((x) <= 128) ?	 7 : 8))))
94 
95 static cache_t *
96 get_cache(cache_t *cap, head_t *chp)
97 {
98 	/*
99 	 *  Search cache:
100 	 *
101 	 *  The caller pass a pointer to the first "cache" object in the current
102 	 *  hash chain ["cap"] and a pointer to the corresponding cache header
103 	 *  ["chp"].  This routine follows the cache chain until it finds an
104 	 *  entry that matches both the current device [as noted in "x_dev"]
105 	 *  and the cache-specific comparison ["chp->cmp"].
106 	 *
107 	 *  Returns the address of the matching cache object or null if there
108 	 *  is none.
109 	 */
110 
111 	while (cap) {
112 		/*
113 		 * Check all entries on the cache chain.  We expect
114 		 * chains to be relatively short, so we use a simple
115 		 * linear search.
116 		 */
117 		if ((x_dev == cap->dev) && (*chp->cmp)(cap)) {
118 			/*
119 			 * Found the entry we're looking for! Move it
120 			 * to the front of the cache header's LRU list
121 			 * before returing its addres to the caller.
122 			 */
123 			cap->link[Next]->link[Prev] = cap->link[Prev];
124 			cap->link[Prev]->link[Next] = cap->link[Next];
125 
126 			cap->link[Prev] = (cache_t *)chp->aged;
127 			cap->link[Next] = chp->aged[Frst];
128 			chp->aged[Frst]->link[Prev] = cap;
129 			chp->aged[Frst] = cap;
130 			chp->hits += 1;
131 			break;
132 		}
133 
134 		cap = cap->link[Hash+Next];
135 	}
136 
137 	chp->searches += 1;
138 	return (cap);
139 }
140 
141 static cache_t *
142 reclaim_cache(head_t *chp, int dev)
143 {
144 	/*
145 	 * Reclaim a cache element:
146 	 *
147 	 * This routine is used to: [a] free the oldest element from
148 	 * the cache headed at "chp" and return the address of the
149 	 * corresponding "cache_t" struct (iff dev == -1), or [b] free all
150 	 * elements on the cache headed at "chp" that belong to the
151 	 * indicated "dev"ice.
152 	 */
153 	cache_t *cap, *cxp;
154 	cache_t *cpp = (cache_t *)chp;
155 
156 	while ((cap = cpp->link[Prev]) != (cache_t *)chp) {
157 		/*
158 		 * We follow the cache's LRU chain from oldest to
159 		 * newest member.  This ensures that we remove only
160 		 * the oldest element when we're called with a
161 		 * negative "dev" argument.
162 		 */
163 		if ((dev == -1) || (dev == cap->dev)) {
164 			/*
165 			 * This is one of the (perhaps the only)
166 			 * elements we're supposed to free.  Remove it
167 			 * from both the LRU list and its associated
168 			 * hash chain.  Then free the data bound the
169 			 * the cache_t element and, if "dev" is
170 			 * not -1, the element itself!
171 			 */
172 			cap->link[Prev]->link[Next] = cap->link[Next];
173 			cap->link[Next]->link[Prev] = cap->link[Prev];
174 
175 			if ((cxp = cap->link[Hash+Prev]) != 0)
176 				cxp->link[Hash+Next] = cap->link[Hash+Next];
177 			else
178 				*(cap->chn) = cap->link[Hash+Next];
179 
180 			if ((cxp = cap->link[Hash+Next]) != 0)
181 				cxp->link[Hash+Prev] = cap->link[Hash+Prev];
182 
183 			bkmem_free((caddr_t)cap->data, cap->size);
184 			if (dev == -1)
185 				return (cap);
186 
187 			bkmem_free((caddr_t)cap, chp->size);
188 			chp->count -= 1;
189 
190 		} else {
191 			/*
192 			 * Skip this element, it's not one of the
193 			 * ones we want to free up.
194 			 */
195 			cpp = cap;
196 		}
197 	};
198 
199 	return (0);
200 }
201 
202 static cache_t *
203 set_cache(cache_t **ccp, head_t *chp, int noreclaim)
204 {
205 	/*
206 	 *  Install a cache element:
207 	 *
208 	 *  The caller passes the address of cache descriptor ["chp"] and the
209 	 *  hash chain into which the new element is to be linked ["ccp"].  This
210 	 *  routine allocates a new cache_t structure (or, if the maximum number
211 	 *  of elements has already been allocated, reclaims the oldest element
212 	 *  from the cache), links it into the indicated hash chain, and returns
213 	 *  its address to the caller.
214 	 */
215 	cache_t *cap;
216 
217 	if ((chp->count < chp->maxblks) &&
218 	    (cap = (cache_t *)bkmem_alloc(chp->size))) {
219 		/*
220 		 * We haven't reached the maximum cache size yet.
221 		 * Allocate a new "cache_t" struct to be added to the
222 		 * cache.
223 		 */
224 		chp->count += 1;
225 
226 	} else {
227 		if (noreclaim)
228 			return (NULL);
229 
230 		/*
231 		 * Cache is full.  Use the "reclaim_cache" routine to
232 		 * remove the oldest element from the cache.  This
233 		 * will become the cache_t struct associated with the
234 		 * new element.
235 		 */
236 		cap = reclaim_cache(chp, -1);
237 		chp->purges += 1;
238 	}
239 
240 	bzero((char *)cap, chp->size);
241 
242 	cap->chn = ccp;
243 	cap->link[Prev] = (cache_t *)chp;
244 	cap->link[Next] = chp->aged[Frst];
245 	cap->link[Prev]->link[Next] = cap->link[Next]->link[Prev] = cap;
246 
247 	if ((cap->link[Hash+Next] = *ccp) != 0)
248 		(*ccp)->link[Hash+Prev] = cap;
249 	return (*ccp = cap);
250 }
251 
252 /*
253  *  The File Cache:
254  *
255  *  This cache (also known as the inode cache) is used to keep track of all
256  *  files open on a given device.  The only special data required to locate
257  *  a cache entry is the file reference number which is file-system dependent
258  *  (for UNIX file systems, it's an inode number).
259  */
260 
261 typedef struct icache {		/* Inode cache element:		*/
262 	cache_t ic_hdr;		/* .. Standard header		*/
263 	int	ic_num;		/* .. I-node number		*/
264 } ic_t;
265 
266 #define	IC_MAX_HDRS (1 << LOG2(ICACHE_SIZE/6))
267 #define	IC_HASH(d, i) (((d) + (i)) & (IC_MAX_HDRS - 1))
268 
269 static int x_inode;
270 
271 static int		    /* Cache search predicate:			    */
272 cmp_icache(cache_t *p)
273 {
274 	/* Just check the file number ("x_inode") ...	*/
275 	return (((ic_t *)p)->ic_num == x_inode);
276 }
277 
278 static head_t	ic_head = cache_head(ic_head, cmp_icache, ic_t, ICACHE_SIZE);
279 static cache_t *ic_hash[IC_MAX_HDRS];
280 
281 void *
282 get_icache(int dev, int inum)
283 {
284 	/*
285 	 *  Search File Cache:
286 	 *
287 	 *  This routine searches the file cache looking for the entry bound to
288 	 *  the given "dev"ice and file number ["inum"].  If said entry exists,
289 	 *  it returns the address of the associated file structure.  Otherwise
290 	 *  it returns null.
291 	 */
292 	cache_t *icp;
293 
294 	x_dev = dev;
295 	x_inode = inum;
296 	icp = get_cache(ic_hash[IC_HASH(dev, inum)], &ic_head);
297 
298 	return (icp ? (caddr_t)icp->data : 0);
299 }
300 
301 void
302 set_icache(int dev, int inum, void *ip, int size)
303 {
304 	/*
305 	 *  Build a File Cache Entry:
306 	 *
307 	 * This routne installs the "size"-byte file structure at
308 	 * "*ip" in the inode cache where it may be retrieved by
309 	 * subsequent call to get_icache.
310 	 */
311 	ic_t *icp = (ic_t *)set_cache(&ic_hash[IC_HASH(dev, inum)],
312 								&ic_head, 0);
313 	icp->ic_num = inum;
314 	icp->ic_hdr.data = ip;
315 	icp->ic_hdr.dev = dev;
316 	icp->ic_hdr.size = size;
317 }
318 
319 int
320 set_ricache(int dev, int inum, void *ip, int size)
321 {
322 	/*
323 	 * Reliably set the icache
324 	 *
325 	 * This routine is the same as set_icache except that it
326 	 * will return 1 if the entry could not be entered into the cache
327 	 * without a purge.
328 	 */
329 	ic_t *icp = (ic_t *)set_cache(&ic_hash[IC_HASH(dev, inum)],
330 					&ic_head, 1);
331 
332 	if (icp == NULL)
333 		return (1);
334 
335 	icp->ic_num = inum;
336 	icp->ic_hdr.data = ip;
337 	icp->ic_hdr.dev = dev;
338 	icp->ic_hdr.size = size;
339 
340 	return (0);
341 }
342 
343 /*
344  *  The Directory Cache:
345  *
346  *  This cache is designed to speed directory searches.	 Each entry cor-
347  *  responds to a directory entry that was used in a pathname resolution.
348  *  The idea is that most files used by the boot wil be contained in a hand-
349  *  full of directories, so we can speed searches if we know ahead of time
350  *  just where these directories are.
351  */
352 
353 typedef struct dcache {		/* Directory cache objects:	*/
354 	cache_t dc_hdr;		/* .. Standard header		*/
355 	int	dc_inum;	/* .. File number		*/
356 	int	dc_pnum;	/* .. Parent diretory's file number */
357 } dc_t;
358 
359 #define	DC_MAX_HDRS (1 << LOG2(DCACHE_SIZE/6))
360 #define	DC_HASH(d, n, l) (((d) + (n)[0] + (n)[(l)-1] + (l)) & (DC_MAX_HDRS-1))
361 
362 static char *x_name;
363 static int x_pnum;
364 
365 static int
366 cmp_dcache(cache_t *p) /* Cache Search predicate:	*/
367 {
368 	/* Check name, length, and parent's file number	*/
369 	return ((x_len == p->size) && (x_pnum == ((dc_t *)p)->dc_pnum) &&
370 	    (strcmp((char *)p->data, x_name) == 0));
371 }
372 
373 static head_t	dc_head = cache_head(dc_head, cmp_dcache, dc_t, DCACHE_SIZE);
374 static cache_t *dc_hash[DC_MAX_HDRS];
375 
376 int
377 get_dcache(int dev, char *name, int pnum)
378 {
379 	/*
380 	 *  Search Directory Cache:
381 	 *
382 	 *  This routine searches the directory cache for an entry
383 	 *  associated with directory number "pnum" from the given
384 	 *  file system that de-scribes a file of the given "name".
385 	 *  If we find such an entry, we return the corresponding file
386 	 *  number, 0 otherwise.
387 	 */
388 	dc_t *dcp;
389 
390 	x_dev = dev;
391 	x_len = strlen(name)+1;
392 	x_pnum = pnum;
393 	x_name = name;
394 	dcp = (dc_t *)get_cache(dc_hash[DC_HASH(dev, name, x_len)], &dc_head);
395 
396 	return (dcp ? dcp->dc_inum : 0);
397 }
398 
399 void
400 set_dcache(int dev, char *name, int pnum, int inum)
401 {
402 	/*
403 	 *  Build Directory Cache Entry:
404 	 *
405 	 *  This routine creates directory cache entries to be retrieved later
406 	 *  via "get_dcache".  The cache key is composed of three parts: The
407 	 *  device specifier, the file name ("name"), and the file number of
408 	 *  the directory containing that name ("pnum").  The data portion of
409 	 *  the entry consists of the file number ("inum").
410 	 */
411 
412 	int len = strlen(name)+1;
413 	dc_t *dcp =
414 	    (dc_t *)set_cache(&dc_hash[DC_HASH(dev, name, len)], &dc_head, 0);
415 
416 	if (dcp->dc_hdr.data = (void *)bkmem_alloc(len)) {
417 		/*
418 		 * Allocate a buffer for the pathname component, and
419 		 * make this the "data" portion of the generalize
420 		 * "cache_t" struct. Also fill in the cache-specific
421 		 * fields (pnum, inum).
422 		 */
423 		dcp->dc_pnum = pnum;
424 		dcp->dc_inum = inum;
425 		dcp->dc_hdr.dev = dev;
426 		dcp->dc_hdr.size = len;
427 		bcopy(name, (char *)dcp->dc_hdr.data, len);
428 
429 	} else {
430 		/*
431 		 * Not enough memory to make a copy of the name!
432 		 * There's probably not enough to do much else either!
433 		 */
434 		prom_panic("no memory for directory cache");
435 	}
436 }
437 
438 int
439 set_rdcache(int dev, char *name, int pnum, int inum)
440 {
441 	/*
442 	 * Reliably set the dcache
443 	 *
444 	 * This routine is the same as set_dcache except that it
445 	 * return 1 if the entry could not be entered into
446 	 * the cache without a purge.
447 	 */
448 	int len = strlen(name) + 1;
449 	dc_t *dcp =
450 		(dc_t *)set_cache(&dc_hash[DC_HASH(dev, name, len)],
451 								&dc_head, 1);
452 
453 	if (dcp == NULL)
454 		return (1);
455 
456 	if ((dcp->dc_hdr.data = (void *)bkmem_alloc(len)) == NULL) {
457 		/*
458 		 * Not enough memory to make a copy of the name!
459 		 * There's probably not enough to do much else either!
460 		 */
461 		prom_panic("no memory for directory cache");
462 		/* NOTREACHED */
463 	}
464 
465 	/*
466 	 * Allocate a buffer for the pathname component, and
467 	 * make this the "data" portion of the generalize
468 	 * "cache_t" struct. Also fill in the cache-specific
469 	 * fields (pnum, inum).
470 	 */
471 	dcp->dc_pnum = pnum;
472 	dcp->dc_inum = inum;
473 	dcp->dc_hdr.dev = dev;
474 	dcp->dc_hdr.size = len;
475 	bcopy(name, (char *)dcp->dc_hdr.data, len);
476 
477 	return (0);
478 }
479 
480 /*
481  *  Disk Block Cache:
482  */
483 
484 typedef struct bcache {	    /* Disk block cache objects:		*/
485 	cache_t		bc_hdr;	/* .. Standard header			*/
486 	unsigned long	bc_blk;	/* .. The block number			*/
487 } bc_t;
488 
489 #define	BC_MAX_HDRS (1 << LOG2(BCACHE_SIZE/6))
490 #define	BC_HASH(d, b, l) (((d) + (b) + ((l) >> 8)) & (BC_MAX_HDRS-1))
491 
492 static unsigned long x_blkno;
493 
494 static int
495 cmp_bcache(cache_t *p) /* Cache Search predicate:		*/
496 {
497 	/* Check block number, buffer size	*/
498 	return ((x_len == p->size) && (x_blkno == ((bc_t *)p)->bc_blk));
499 }
500 
501 static head_t	bc_head = cache_head(bc_head, cmp_bcache, bc_t, BCACHE_SIZE);
502 static cache_t *bc_hash[BC_MAX_HDRS];
503 
504 caddr_t
505 get_bcache(fileid_t *fp)
506 {
507 	/*
508 	 *  Search Disk Block Cache:
509 	 *
510 	 *  This should be getting pretty monotonous by now.  Aren't generalized
511 	 *  subroutines ("objects", if you prefer) great?
512 	 */
513 	cache_t *bcp;
514 
515 	x_len = fp->fi_count;
516 	x_blkno = fp->fi_blocknum;
517 	x_dev = fp->fi_devp->di_dcookie;
518 	bcp = get_cache(bc_hash[BC_HASH(x_dev, x_blkno, x_len)], &bc_head);
519 
520 	return (bcp ? (caddr_t)bcp->data : 0);
521 }
522 
523 int
524 set_bcache(fileid_t *fp)
525 {
526 	/*
527 	 *  Insert Disk Block Cache Entry:
528 	 *
529 	 *  In this case, we actually read the requested block into a
530 	 *  dynamically allocated buffer before inserting it into the
531 	 *  cache.  If the read fails, we return a non-zero value.
532 	 *
533 	 *  The search keys for disk blocks are the block number and
534 	 *  buffer size.  The data associated with each entry is the
535 	 *  corresponding data buffer.
536 	 */
537 	bc_t *bcp;
538 
539 	if (fp->fi_memp = bkmem_alloc(x_len = fp->fi_count)) {
540 		/*
541 		 *  We were able to succesffully allocate an input
542 		 *  buffer, now read the data into it.
543 		 */
544 		if (diskread(fp) != 0) {
545 			/*
546 			 * I/O error on read. Free the input buffer,
547 			 * print an error message, and bail out.
548 			 */
549 			bkmem_free(fp->fi_memp, x_len);
550 			printf("disk read error\n");
551 			return (-1);
552 		}
553 
554 		x_blkno = fp->fi_blocknum;
555 		x_dev = fp->fi_devp->di_dcookie;
556 		bcp = (bc_t *)
557 			set_cache(&bc_hash[BC_HASH(x_dev, x_blkno, x_len)],
558 								&bc_head, 0);
559 		bcp->bc_blk = x_blkno;
560 		bcp->bc_hdr.dev = x_dev;
561 		bcp->bc_hdr.size = x_len;
562 		bcp->bc_hdr.data = (void *)fp->fi_memp;
563 
564 	} else {
565 		/*
566 		 * We could be a bit more convervative here by
567 		 * calling "set_cache" before we try to allocate a
568 		 * buffer (thereby giving us a chance to re-use a
569 		 * previously allocated buffer) but the error recovery
570 		 * is a bit trickier, and if we're that short on memory
571 		 * we'll have trouble elsewhere anyway!
572 		 */
573 		prom_panic("can't read - no memory");
574 	}
575 
576 	return (0);
577 }
578 
579 void
580 release_cache(int dev)
581 {
582 	/*
583 	 *  Reclaim all cache entries:
584 	 *
585 	 *  This routine is called by the file-system's "closeall" method.  It
586 	 *  removes all cache entries associated with that file system from the
587 	 *  global cache and release any resources bound to said entrires.
588 	 */
589 
590 	(void) reclaim_cache(&ic_head, dev);
591 	(void) reclaim_cache(&dc_head, dev);
592 	(void) reclaim_cache(&bc_head, dev);
593 }
594 
595 void
596 print_cache_data()
597 {
598 	/*
599 	 *  Print some cacheing statistics ...
600 	 */
601 	static char	*tag[] = { "inode", "directory", "disk block", 0};
602 	static head_t	*hdp[] = { &ic_head, &dc_head, &bc_head, 0};
603 
604 	int j;
605 
606 	for (j = 0; tag[j]; j++) {
607 		/*
608 		 * Print statistics maintained in the header
609 		 * ("head_t" struct) of each of the above caches.
610 		 */
611 		head_t *hp = hdp[j];
612 
613 		if (j)
614 			printf("\n");
615 		printf("%s cache:\n", tag[j]);
616 		printf("   max size %d\n", hp->maxblks);
617 		printf("   actual size %d\n", hp->count);
618 		printf("   total searches %d\n", hp->searches);
619 		printf("   cache hits %d\n", hp->hits);
620 		printf("   cache purges %d\n", hp->purges);
621 	}
622 
623 	printf("\nread opts %d\n", read_opt);
624 }
625