1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * 26 * This is mostly new code. Major revisions were made to allow multiple 27 * file systems to share a common cache. While this consisted primarily 28 * of including a "devid_t" pointer in the hash functions, I also re- 29 * organized everything to eliminate much of the duplicated code that 30 * had existed previously. 31 */ 32 33 #pragma ident "%Z%%M% %I% %E% SMI" 34 35 #include <sys/param.h> 36 #include <sys/vnode.h> 37 #include <sys/sysmacros.h> 38 #include <sys/filep.h> 39 #include <sys/salib.h> 40 #include <sys/promif.h> 41 42 #ifndef ICACHE_SIZE 43 /* 44 * These should probably be defined in an architecture-specific header 45 * file. The values below are analogous to those used in earlier versions 46 * of this module. 47 */ 48 49 #define ICACHE_SIZE 350 /* Max number of I-node in file cache */ 50 #define DCACHE_SIZE 1500 /* Max number of cached directories */ 51 #define BCACHE_SIZE 250 /* Max number of cached disk blocks */ 52 #endif 53 54 #define Next 0 /* Next pointer in Fwd/Bak link */ 55 #define Prev 1 /* Previous pointer in Fwd/Back links */ 56 57 #define Frst 0 /* Ptr to first element of a chain */ 58 #define Last 1 /* Ptr to last element of a chain */ 59 60 #define Hash 2 /* Offset of hash chain ptrs. */ 61 62 typedef struct cache { /* Generic cache element: */ 63 struct cache *link[4]; /* .. Fwd/Bak links for hash chain & LRU */ 64 struct cache **chn; /* .. Hash chain link */ 65 int dev; /* .. Device file handle */ 66 void *data; /* .. Ptr to associated data */ 67 int size; /* .. Size of cached data */ 68 } cache_t; 69 70 typedef struct head { /* Generic cache header: */ 71 cache_t *aged[2]; /* .. LRU list */ 72 int (*cmp)(cache_t *); /* .. Ptr to comparison function */ 73 int size; /* .. Size of "cache" objects */ 74 int maxblks; /* .. Max number of cached elements */ 75 int count; /* .. Current number of cached elements */ 76 int hits; /* .. Total cache hits */ 77 int searches; /* .. Total searches */ 78 int purges; /* .. Total purges */ 79 } head_t; 80 81 /* Constructor for cache headers: */ 82 #define cache_head(h, f, t, n) \ 83 {{(cache_t *)&h, (cache_t *)&h}, f, sizeof (t), n} 84 85 int read_opt; /* Number of times cache was bypassed */ 86 static int x_dev; /* Target device ID saved here! */ 87 static int x_len; /* length of object */ 88 89 #define LOG2(x) \ 90 (((x) <= 16) ? 4 : /* Yeah, it's ugly. But it works! */ \ 91 (((x) <= 32) ? 5 : /* .. Binary log should be part of */ \ 92 (((x) <= 64) ? 6 : /* .. the language! */ \ 93 (((x) <= 128) ? 7 : 8)))) 94 95 static cache_t * 96 get_cache(cache_t *cap, head_t *chp) 97 { 98 /* 99 * Search cache: 100 * 101 * The caller pass a pointer to the first "cache" object in the current 102 * hash chain ["cap"] and a pointer to the corresponding cache header 103 * ["chp"]. This routine follows the cache chain until it finds an 104 * entry that matches both the current device [as noted in "x_dev"] 105 * and the cache-specific comparison ["chp->cmp"]. 106 * 107 * Returns the address of the matching cache object or null if there 108 * is none. 109 */ 110 111 while (cap) { 112 /* 113 * Check all entries on the cache chain. We expect 114 * chains to be relatively short, so we use a simple 115 * linear search. 116 */ 117 if ((x_dev == cap->dev) && (*chp->cmp)(cap)) { 118 /* 119 * Found the entry we're looking for! Move it 120 * to the front of the cache header's LRU list 121 * before returing its addres to the caller. 122 */ 123 cap->link[Next]->link[Prev] = cap->link[Prev]; 124 cap->link[Prev]->link[Next] = cap->link[Next]; 125 126 cap->link[Prev] = (cache_t *)chp->aged; 127 cap->link[Next] = chp->aged[Frst]; 128 chp->aged[Frst]->link[Prev] = cap; 129 chp->aged[Frst] = cap; 130 chp->hits += 1; 131 break; 132 } 133 134 cap = cap->link[Hash+Next]; 135 } 136 137 chp->searches += 1; 138 return (cap); 139 } 140 141 static cache_t * 142 reclaim_cache(head_t *chp, int dev) 143 { 144 /* 145 * Reclaim a cache element: 146 * 147 * This routine is used to: [a] free the oldest element from 148 * the cache headed at "chp" and return the address of the 149 * corresponding "cache_t" struct (iff dev == -1), or [b] free all 150 * elements on the cache headed at "chp" that belong to the 151 * indicated "dev"ice. 152 */ 153 cache_t *cap, *cxp; 154 cache_t *cpp = (cache_t *)chp; 155 156 while ((cap = cpp->link[Prev]) != (cache_t *)chp) { 157 /* 158 * We follow the cache's LRU chain from oldest to 159 * newest member. This ensures that we remove only 160 * the oldest element when we're called with a 161 * negative "dev" argument. 162 */ 163 if ((dev == -1) || (dev == cap->dev)) { 164 /* 165 * This is one of the (perhaps the only) 166 * elements we're supposed to free. Remove it 167 * from both the LRU list and its associated 168 * hash chain. Then free the data bound the 169 * the cache_t element and, if "dev" is 170 * not -1, the element itself! 171 */ 172 cap->link[Prev]->link[Next] = cap->link[Next]; 173 cap->link[Next]->link[Prev] = cap->link[Prev]; 174 175 if ((cxp = cap->link[Hash+Prev]) != 0) 176 cxp->link[Hash+Next] = cap->link[Hash+Next]; 177 else 178 *(cap->chn) = cap->link[Hash+Next]; 179 180 if ((cxp = cap->link[Hash+Next]) != 0) 181 cxp->link[Hash+Prev] = cap->link[Hash+Prev]; 182 183 bkmem_free((caddr_t)cap->data, cap->size); 184 if (dev == -1) 185 return (cap); 186 187 bkmem_free((caddr_t)cap, chp->size); 188 chp->count -= 1; 189 190 } else { 191 /* 192 * Skip this element, it's not one of the 193 * ones we want to free up. 194 */ 195 cpp = cap; 196 } 197 }; 198 199 return (0); 200 } 201 202 static cache_t * 203 set_cache(cache_t **ccp, head_t *chp, int noreclaim) 204 { 205 /* 206 * Install a cache element: 207 * 208 * The caller passes the address of cache descriptor ["chp"] and the 209 * hash chain into which the new element is to be linked ["ccp"]. This 210 * routine allocates a new cache_t structure (or, if the maximum number 211 * of elements has already been allocated, reclaims the oldest element 212 * from the cache), links it into the indicated hash chain, and returns 213 * its address to the caller. 214 */ 215 cache_t *cap; 216 217 if ((chp->count < chp->maxblks) && 218 (cap = (cache_t *)bkmem_alloc(chp->size))) { 219 /* 220 * We haven't reached the maximum cache size yet. 221 * Allocate a new "cache_t" struct to be added to the 222 * cache. 223 */ 224 chp->count += 1; 225 226 } else { 227 if (noreclaim) 228 return (NULL); 229 230 /* 231 * Cache is full. Use the "reclaim_cache" routine to 232 * remove the oldest element from the cache. This 233 * will become the cache_t struct associated with the 234 * new element. 235 */ 236 cap = reclaim_cache(chp, -1); 237 chp->purges += 1; 238 } 239 240 bzero((char *)cap, chp->size); 241 242 cap->chn = ccp; 243 cap->link[Prev] = (cache_t *)chp; 244 cap->link[Next] = chp->aged[Frst]; 245 cap->link[Prev]->link[Next] = cap->link[Next]->link[Prev] = cap; 246 247 if ((cap->link[Hash+Next] = *ccp) != 0) 248 (*ccp)->link[Hash+Prev] = cap; 249 return (*ccp = cap); 250 } 251 252 /* 253 * The File Cache: 254 * 255 * This cache (also known as the inode cache) is used to keep track of all 256 * files open on a given device. The only special data required to locate 257 * a cache entry is the file reference number which is file-system dependent 258 * (for UNIX file systems, it's an inode number). 259 */ 260 261 typedef struct icache { /* Inode cache element: */ 262 cache_t ic_hdr; /* .. Standard header */ 263 int ic_num; /* .. I-node number */ 264 } ic_t; 265 266 #define IC_MAX_HDRS (1 << LOG2(ICACHE_SIZE/6)) 267 #define IC_HASH(d, i) (((d) + (i)) & (IC_MAX_HDRS - 1)) 268 269 static int x_inode; 270 271 static int /* Cache search predicate: */ 272 cmp_icache(cache_t *p) 273 { 274 /* Just check the file number ("x_inode") ... */ 275 return (((ic_t *)p)->ic_num == x_inode); 276 } 277 278 static head_t ic_head = cache_head(ic_head, cmp_icache, ic_t, ICACHE_SIZE); 279 static cache_t *ic_hash[IC_MAX_HDRS]; 280 281 void * 282 get_icache(int dev, int inum) 283 { 284 /* 285 * Search File Cache: 286 * 287 * This routine searches the file cache looking for the entry bound to 288 * the given "dev"ice and file number ["inum"]. If said entry exists, 289 * it returns the address of the associated file structure. Otherwise 290 * it returns null. 291 */ 292 cache_t *icp; 293 294 x_dev = dev; 295 x_inode = inum; 296 icp = get_cache(ic_hash[IC_HASH(dev, inum)], &ic_head); 297 298 return (icp ? (caddr_t)icp->data : 0); 299 } 300 301 void 302 set_icache(int dev, int inum, void *ip, int size) 303 { 304 /* 305 * Build a File Cache Entry: 306 * 307 * This routne installs the "size"-byte file structure at 308 * "*ip" in the inode cache where it may be retrieved by 309 * subsequent call to get_icache. 310 */ 311 ic_t *icp = (ic_t *)set_cache(&ic_hash[IC_HASH(dev, inum)], 312 &ic_head, 0); 313 icp->ic_num = inum; 314 icp->ic_hdr.data = ip; 315 icp->ic_hdr.dev = dev; 316 icp->ic_hdr.size = size; 317 } 318 319 int 320 set_ricache(int dev, int inum, void *ip, int size) 321 { 322 /* 323 * Reliably set the icache 324 * 325 * This routine is the same as set_icache except that it 326 * will return 1 if the entry could not be entered into the cache 327 * without a purge. 328 */ 329 ic_t *icp = (ic_t *)set_cache(&ic_hash[IC_HASH(dev, inum)], 330 &ic_head, 1); 331 332 if (icp == NULL) 333 return (1); 334 335 icp->ic_num = inum; 336 icp->ic_hdr.data = ip; 337 icp->ic_hdr.dev = dev; 338 icp->ic_hdr.size = size; 339 340 return (0); 341 } 342 343 /* 344 * The Directory Cache: 345 * 346 * This cache is designed to speed directory searches. Each entry cor- 347 * responds to a directory entry that was used in a pathname resolution. 348 * The idea is that most files used by the boot wil be contained in a hand- 349 * full of directories, so we can speed searches if we know ahead of time 350 * just where these directories are. 351 */ 352 353 typedef struct dcache { /* Directory cache objects: */ 354 cache_t dc_hdr; /* .. Standard header */ 355 int dc_inum; /* .. File number */ 356 int dc_pnum; /* .. Parent diretory's file number */ 357 } dc_t; 358 359 #define DC_MAX_HDRS (1 << LOG2(DCACHE_SIZE/6)) 360 #define DC_HASH(d, n, l) (((d) + (n)[0] + (n)[(l)-1] + (l)) & (DC_MAX_HDRS-1)) 361 362 static char *x_name; 363 static int x_pnum; 364 365 static int 366 cmp_dcache(cache_t *p) /* Cache Search predicate: */ 367 { 368 /* Check name, length, and parent's file number */ 369 return ((x_len == p->size) && (x_pnum == ((dc_t *)p)->dc_pnum) && 370 (strcmp((char *)p->data, x_name) == 0)); 371 } 372 373 static head_t dc_head = cache_head(dc_head, cmp_dcache, dc_t, DCACHE_SIZE); 374 static cache_t *dc_hash[DC_MAX_HDRS]; 375 376 int 377 get_dcache(int dev, char *name, int pnum) 378 { 379 /* 380 * Search Directory Cache: 381 * 382 * This routine searches the directory cache for an entry 383 * associated with directory number "pnum" from the given 384 * file system that de-scribes a file of the given "name". 385 * If we find such an entry, we return the corresponding file 386 * number, 0 otherwise. 387 */ 388 dc_t *dcp; 389 390 x_dev = dev; 391 x_len = strlen(name)+1; 392 x_pnum = pnum; 393 x_name = name; 394 dcp = (dc_t *)get_cache(dc_hash[DC_HASH(dev, name, x_len)], &dc_head); 395 396 return (dcp ? dcp->dc_inum : 0); 397 } 398 399 void 400 set_dcache(int dev, char *name, int pnum, int inum) 401 { 402 /* 403 * Build Directory Cache Entry: 404 * 405 * This routine creates directory cache entries to be retrieved later 406 * via "get_dcache". The cache key is composed of three parts: The 407 * device specifier, the file name ("name"), and the file number of 408 * the directory containing that name ("pnum"). The data portion of 409 * the entry consists of the file number ("inum"). 410 */ 411 412 int len = strlen(name)+1; 413 dc_t *dcp = 414 (dc_t *)set_cache(&dc_hash[DC_HASH(dev, name, len)], &dc_head, 0); 415 416 if (dcp->dc_hdr.data = (void *)bkmem_alloc(len)) { 417 /* 418 * Allocate a buffer for the pathname component, and 419 * make this the "data" portion of the generalize 420 * "cache_t" struct. Also fill in the cache-specific 421 * fields (pnum, inum). 422 */ 423 dcp->dc_pnum = pnum; 424 dcp->dc_inum = inum; 425 dcp->dc_hdr.dev = dev; 426 dcp->dc_hdr.size = len; 427 bcopy(name, (char *)dcp->dc_hdr.data, len); 428 429 } else { 430 /* 431 * Not enough memory to make a copy of the name! 432 * There's probably not enough to do much else either! 433 */ 434 prom_panic("no memory for directory cache"); 435 } 436 } 437 438 int 439 set_rdcache(int dev, char *name, int pnum, int inum) 440 { 441 /* 442 * Reliably set the dcache 443 * 444 * This routine is the same as set_dcache except that it 445 * return 1 if the entry could not be entered into 446 * the cache without a purge. 447 */ 448 int len = strlen(name) + 1; 449 dc_t *dcp = 450 (dc_t *)set_cache(&dc_hash[DC_HASH(dev, name, len)], 451 &dc_head, 1); 452 453 if (dcp == NULL) 454 return (1); 455 456 if ((dcp->dc_hdr.data = (void *)bkmem_alloc(len)) == NULL) { 457 /* 458 * Not enough memory to make a copy of the name! 459 * There's probably not enough to do much else either! 460 */ 461 prom_panic("no memory for directory cache"); 462 /* NOTREACHED */ 463 } 464 465 /* 466 * Allocate a buffer for the pathname component, and 467 * make this the "data" portion of the generalize 468 * "cache_t" struct. Also fill in the cache-specific 469 * fields (pnum, inum). 470 */ 471 dcp->dc_pnum = pnum; 472 dcp->dc_inum = inum; 473 dcp->dc_hdr.dev = dev; 474 dcp->dc_hdr.size = len; 475 bcopy(name, (char *)dcp->dc_hdr.data, len); 476 477 return (0); 478 } 479 480 /* 481 * Disk Block Cache: 482 */ 483 484 typedef struct bcache { /* Disk block cache objects: */ 485 cache_t bc_hdr; /* .. Standard header */ 486 unsigned long bc_blk; /* .. The block number */ 487 } bc_t; 488 489 #define BC_MAX_HDRS (1 << LOG2(BCACHE_SIZE/6)) 490 #define BC_HASH(d, b, l) (((d) + (b) + ((l) >> 8)) & (BC_MAX_HDRS-1)) 491 492 static unsigned long x_blkno; 493 494 static int 495 cmp_bcache(cache_t *p) /* Cache Search predicate: */ 496 { 497 /* Check block number, buffer size */ 498 return ((x_len == p->size) && (x_blkno == ((bc_t *)p)->bc_blk)); 499 } 500 501 static head_t bc_head = cache_head(bc_head, cmp_bcache, bc_t, BCACHE_SIZE); 502 static cache_t *bc_hash[BC_MAX_HDRS]; 503 504 caddr_t 505 get_bcache(fileid_t *fp) 506 { 507 /* 508 * Search Disk Block Cache: 509 * 510 * This should be getting pretty monotonous by now. Aren't generalized 511 * subroutines ("objects", if you prefer) great? 512 */ 513 cache_t *bcp; 514 515 x_len = fp->fi_count; 516 x_blkno = fp->fi_blocknum; 517 x_dev = fp->fi_devp->di_dcookie; 518 bcp = get_cache(bc_hash[BC_HASH(x_dev, x_blkno, x_len)], &bc_head); 519 520 return (bcp ? (caddr_t)bcp->data : 0); 521 } 522 523 int 524 set_bcache(fileid_t *fp) 525 { 526 /* 527 * Insert Disk Block Cache Entry: 528 * 529 * In this case, we actually read the requested block into a 530 * dynamically allocated buffer before inserting it into the 531 * cache. If the read fails, we return a non-zero value. 532 * 533 * The search keys for disk blocks are the block number and 534 * buffer size. The data associated with each entry is the 535 * corresponding data buffer. 536 */ 537 bc_t *bcp; 538 539 if (fp->fi_memp = bkmem_alloc(x_len = fp->fi_count)) { 540 /* 541 * We were able to succesffully allocate an input 542 * buffer, now read the data into it. 543 */ 544 if (diskread(fp) != 0) { 545 /* 546 * I/O error on read. Free the input buffer, 547 * print an error message, and bail out. 548 */ 549 bkmem_free(fp->fi_memp, x_len); 550 printf("disk read error\n"); 551 return (-1); 552 } 553 554 x_blkno = fp->fi_blocknum; 555 x_dev = fp->fi_devp->di_dcookie; 556 bcp = (bc_t *) 557 set_cache(&bc_hash[BC_HASH(x_dev, x_blkno, x_len)], 558 &bc_head, 0); 559 bcp->bc_blk = x_blkno; 560 bcp->bc_hdr.dev = x_dev; 561 bcp->bc_hdr.size = x_len; 562 bcp->bc_hdr.data = (void *)fp->fi_memp; 563 564 } else { 565 /* 566 * We could be a bit more convervative here by 567 * calling "set_cache" before we try to allocate a 568 * buffer (thereby giving us a chance to re-use a 569 * previously allocated buffer) but the error recovery 570 * is a bit trickier, and if we're that short on memory 571 * we'll have trouble elsewhere anyway! 572 */ 573 prom_panic("can't read - no memory"); 574 } 575 576 return (0); 577 } 578 579 void 580 release_cache(int dev) 581 { 582 /* 583 * Reclaim all cache entries: 584 * 585 * This routine is called by the file-system's "closeall" method. It 586 * removes all cache entries associated with that file system from the 587 * global cache and release any resources bound to said entrires. 588 */ 589 590 (void) reclaim_cache(&ic_head, dev); 591 (void) reclaim_cache(&dc_head, dev); 592 (void) reclaim_cache(&bc_head, dev); 593 } 594 595 void 596 print_cache_data() 597 { 598 /* 599 * Print some cacheing statistics ... 600 */ 601 static char *tag[] = { "inode", "directory", "disk block", 0}; 602 static head_t *hdp[] = { &ic_head, &dc_head, &bc_head, 0}; 603 604 int j; 605 606 for (j = 0; tag[j]; j++) { 607 /* 608 * Print statistics maintained in the header 609 * ("head_t" struct) of each of the above caches. 610 */ 611 head_t *hp = hdp[j]; 612 613 if (j) 614 printf("\n"); 615 printf("%s cache:\n", tag[j]); 616 printf(" max size %d\n", hp->maxblks); 617 printf(" actual size %d\n", hp->count); 618 printf(" total searches %d\n", hp->searches); 619 printf(" cache hits %d\n", hp->hits); 620 printf(" cache purges %d\n", hp->purges); 621 } 622 623 printf("\nread opts %d\n", read_opt); 624 } 625