1 /* 2 * linux/fs/mbcache.c 3 * (C) 2001-2002 Andreas Gruenbacher, <a.gruenbacher@computer.org> 4 */ 5 6 /* 7 * Filesystem Meta Information Block Cache (mbcache) 8 * 9 * The mbcache caches blocks of block devices that need to be located 10 * by their device/block number, as well as by other criteria (such 11 * as the block's contents). 12 * 13 * There can only be one cache entry in a cache per device and block number. 14 * Additional indexes need not be unique in this sense. The number of 15 * additional indexes (=other criteria) can be hardwired at compile time 16 * or specified at cache create time. 17 * 18 * Each cache entry is of fixed size. An entry may be `valid' or `invalid' 19 * in the cache. A valid entry is in the main hash tables of the cache, 20 * and may also be in the lru list. An invalid entry is not in any hashes 21 * or lists. 22 * 23 * A valid cache entry is only in the lru list if no handles refer to it. 24 * Invalid cache entries will be freed when the last handle to the cache 25 * entry is released. Entries that cannot be freed immediately are put 26 * back on the lru list. 27 */ 28 29 /* 30 * Lock descriptions and usage: 31 * 32 * Each hash chain of both the block and index hash tables now contains 33 * a built-in lock used to serialize accesses to the hash chain. 34 * 35 * Accesses to global data structures mb_cache_list and mb_cache_lru_list 36 * are serialized via the global spinlock mb_cache_spinlock. 37 * 38 * Each mb_cache_entry contains a spinlock, e_entry_lock, to serialize 39 * accesses to its local data, such as e_used and e_queued. 40 * 41 * Lock ordering: 42 * 43 * Each block hash chain's lock has the highest lock order, followed by an 44 * index hash chain's lock, mb_cache_bg_lock (used to implement mb_cache_entry's 45 * lock), and mb_cach_spinlock, with the lowest order. While holding 46 * either a block or index hash chain lock, a thread can acquire an 47 * mc_cache_bg_lock, which in turn can also acquire mb_cache_spinlock. 48 * 49 * Synchronization: 50 * 51 * Since both mb_cache_entry_get and mb_cache_entry_find scan the block and 52 * index hash chian, it needs to lock the corresponding hash chain. For each 53 * mb_cache_entry within the chain, it needs to lock the mb_cache_entry to 54 * prevent either any simultaneous release or free on the entry and also 55 * to serialize accesses to either the e_used or e_queued member of the entry. 56 * 57 * To avoid having a dangling reference to an already freed 58 * mb_cache_entry, an mb_cache_entry is only freed when it is not on a 59 * block hash chain and also no longer being referenced, both e_used, 60 * and e_queued are 0's. When an mb_cache_entry is explicitly freed it is 61 * first removed from a block hash chain. 62 */ 63 64 #include <linux/kernel.h> 65 #include <linux/module.h> 66 67 #include <linux/hash.h> 68 #include <linux/fs.h> 69 #include <linux/mm.h> 70 #include <linux/slab.h> 71 #include <linux/sched.h> 72 #include <linux/list_bl.h> 73 #include <linux/mbcache.h> 74 #include <linux/init.h> 75 #include <linux/blockgroup_lock.h> 76 #include <linux/log2.h> 77 78 #ifdef MB_CACHE_DEBUG 79 # define mb_debug(f...) do { \ 80 printk(KERN_DEBUG f); \ 81 printk("\n"); \ 82 } while (0) 83 #define mb_assert(c) do { if (!(c)) \ 84 printk(KERN_ERR "assertion " #c " failed\n"); \ 85 } while(0) 86 #else 87 # define mb_debug(f...) do { } while(0) 88 # define mb_assert(c) do { } while(0) 89 #endif 90 #define mb_error(f...) do { \ 91 printk(KERN_ERR f); \ 92 printk("\n"); \ 93 } while(0) 94 95 #define MB_CACHE_WRITER ((unsigned short)~0U >> 1) 96 97 #define MB_CACHE_ENTRY_LOCK_BITS ilog2(NR_BG_LOCKS) 98 #define MB_CACHE_ENTRY_LOCK_INDEX(ce) \ 99 (hash_long((unsigned long)ce, MB_CACHE_ENTRY_LOCK_BITS)) 100 101 static DECLARE_WAIT_QUEUE_HEAD(mb_cache_queue); 102 static struct blockgroup_lock *mb_cache_bg_lock; 103 static struct kmem_cache *mb_cache_kmem_cache; 104 105 MODULE_AUTHOR("Andreas Gruenbacher <a.gruenbacher@computer.org>"); 106 MODULE_DESCRIPTION("Meta block cache (for extended attributes)"); 107 MODULE_LICENSE("GPL"); 108 109 EXPORT_SYMBOL(mb_cache_create); 110 EXPORT_SYMBOL(mb_cache_shrink); 111 EXPORT_SYMBOL(mb_cache_destroy); 112 EXPORT_SYMBOL(mb_cache_entry_alloc); 113 EXPORT_SYMBOL(mb_cache_entry_insert); 114 EXPORT_SYMBOL(mb_cache_entry_release); 115 EXPORT_SYMBOL(mb_cache_entry_free); 116 EXPORT_SYMBOL(mb_cache_entry_get); 117 #if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) 118 EXPORT_SYMBOL(mb_cache_entry_find_first); 119 EXPORT_SYMBOL(mb_cache_entry_find_next); 120 #endif 121 122 /* 123 * Global data: list of all mbcache's, lru list, and a spinlock for 124 * accessing cache data structures on SMP machines. The lru list is 125 * global across all mbcaches. 126 */ 127 128 static LIST_HEAD(mb_cache_list); 129 static LIST_HEAD(mb_cache_lru_list); 130 static DEFINE_SPINLOCK(mb_cache_spinlock); 131 132 static inline void 133 __spin_lock_mb_cache_entry(struct mb_cache_entry *ce) 134 { 135 spin_lock(bgl_lock_ptr(mb_cache_bg_lock, 136 MB_CACHE_ENTRY_LOCK_INDEX(ce))); 137 } 138 139 static inline void 140 __spin_unlock_mb_cache_entry(struct mb_cache_entry *ce) 141 { 142 spin_unlock(bgl_lock_ptr(mb_cache_bg_lock, 143 MB_CACHE_ENTRY_LOCK_INDEX(ce))); 144 } 145 146 static inline int 147 __mb_cache_entry_is_block_hashed(struct mb_cache_entry *ce) 148 { 149 return !hlist_bl_unhashed(&ce->e_block_list); 150 } 151 152 153 static inline void 154 __mb_cache_entry_unhash_block(struct mb_cache_entry *ce) 155 { 156 if (__mb_cache_entry_is_block_hashed(ce)) 157 hlist_bl_del_init(&ce->e_block_list); 158 } 159 160 static inline int 161 __mb_cache_entry_is_index_hashed(struct mb_cache_entry *ce) 162 { 163 return !hlist_bl_unhashed(&ce->e_index.o_list); 164 } 165 166 static inline void 167 __mb_cache_entry_unhash_index(struct mb_cache_entry *ce) 168 { 169 if (__mb_cache_entry_is_index_hashed(ce)) 170 hlist_bl_del_init(&ce->e_index.o_list); 171 } 172 173 /* 174 * __mb_cache_entry_unhash_unlock() 175 * 176 * This function is called to unhash both the block and index hash 177 * chain. 178 * It assumes both the block and index hash chain is locked upon entry. 179 * It also unlock both hash chains both exit 180 */ 181 static inline void 182 __mb_cache_entry_unhash_unlock(struct mb_cache_entry *ce) 183 { 184 __mb_cache_entry_unhash_index(ce); 185 hlist_bl_unlock(ce->e_index_hash_p); 186 __mb_cache_entry_unhash_block(ce); 187 hlist_bl_unlock(ce->e_block_hash_p); 188 } 189 190 static void 191 __mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask) 192 { 193 struct mb_cache *cache = ce->e_cache; 194 195 mb_assert(!(ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt))); 196 kmem_cache_free(cache->c_entry_cache, ce); 197 atomic_dec(&cache->c_entry_count); 198 } 199 200 static void 201 __mb_cache_entry_release(struct mb_cache_entry *ce) 202 { 203 /* First lock the entry to serialize access to its local data. */ 204 __spin_lock_mb_cache_entry(ce); 205 /* Wake up all processes queuing for this cache entry. */ 206 if (ce->e_queued) 207 wake_up_all(&mb_cache_queue); 208 if (ce->e_used >= MB_CACHE_WRITER) 209 ce->e_used -= MB_CACHE_WRITER; 210 /* 211 * Make sure that all cache entries on lru_list have 212 * both e_used and e_qued of 0s. 213 */ 214 ce->e_used--; 215 if (!(ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt))) { 216 if (!__mb_cache_entry_is_block_hashed(ce)) { 217 __spin_unlock_mb_cache_entry(ce); 218 goto forget; 219 } 220 /* 221 * Need access to lru list, first drop entry lock, 222 * then reacquire the lock in the proper order. 223 */ 224 spin_lock(&mb_cache_spinlock); 225 if (list_empty(&ce->e_lru_list)) 226 list_add_tail(&ce->e_lru_list, &mb_cache_lru_list); 227 spin_unlock(&mb_cache_spinlock); 228 } 229 __spin_unlock_mb_cache_entry(ce); 230 return; 231 forget: 232 mb_assert(list_empty(&ce->e_lru_list)); 233 __mb_cache_entry_forget(ce, GFP_KERNEL); 234 } 235 236 /* 237 * mb_cache_shrink_scan() memory pressure callback 238 * 239 * This function is called by the kernel memory management when memory 240 * gets low. 241 * 242 * @shrink: (ignored) 243 * @sc: shrink_control passed from reclaim 244 * 245 * Returns the number of objects freed. 246 */ 247 static unsigned long 248 mb_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) 249 { 250 LIST_HEAD(free_list); 251 struct mb_cache_entry *entry, *tmp; 252 int nr_to_scan = sc->nr_to_scan; 253 gfp_t gfp_mask = sc->gfp_mask; 254 unsigned long freed = 0; 255 256 mb_debug("trying to free %d entries", nr_to_scan); 257 spin_lock(&mb_cache_spinlock); 258 while ((nr_to_scan-- > 0) && !list_empty(&mb_cache_lru_list)) { 259 struct mb_cache_entry *ce = 260 list_entry(mb_cache_lru_list.next, 261 struct mb_cache_entry, e_lru_list); 262 list_del_init(&ce->e_lru_list); 263 if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt)) 264 continue; 265 spin_unlock(&mb_cache_spinlock); 266 /* Prevent any find or get operation on the entry */ 267 hlist_bl_lock(ce->e_block_hash_p); 268 hlist_bl_lock(ce->e_index_hash_p); 269 /* Ignore if it is touched by a find/get */ 270 if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt) || 271 !list_empty(&ce->e_lru_list)) { 272 hlist_bl_unlock(ce->e_index_hash_p); 273 hlist_bl_unlock(ce->e_block_hash_p); 274 spin_lock(&mb_cache_spinlock); 275 continue; 276 } 277 __mb_cache_entry_unhash_unlock(ce); 278 list_add_tail(&ce->e_lru_list, &free_list); 279 spin_lock(&mb_cache_spinlock); 280 } 281 spin_unlock(&mb_cache_spinlock); 282 283 list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) { 284 __mb_cache_entry_forget(entry, gfp_mask); 285 freed++; 286 } 287 return freed; 288 } 289 290 static unsigned long 291 mb_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc) 292 { 293 struct mb_cache *cache; 294 unsigned long count = 0; 295 296 spin_lock(&mb_cache_spinlock); 297 list_for_each_entry(cache, &mb_cache_list, c_cache_list) { 298 mb_debug("cache %s (%d)", cache->c_name, 299 atomic_read(&cache->c_entry_count)); 300 count += atomic_read(&cache->c_entry_count); 301 } 302 spin_unlock(&mb_cache_spinlock); 303 304 return vfs_pressure_ratio(count); 305 } 306 307 static struct shrinker mb_cache_shrinker = { 308 .count_objects = mb_cache_shrink_count, 309 .scan_objects = mb_cache_shrink_scan, 310 .seeks = DEFAULT_SEEKS, 311 }; 312 313 /* 314 * mb_cache_create() create a new cache 315 * 316 * All entries in one cache are equal size. Cache entries may be from 317 * multiple devices. If this is the first mbcache created, registers 318 * the cache with kernel memory management. Returns NULL if no more 319 * memory was available. 320 * 321 * @name: name of the cache (informal) 322 * @bucket_bits: log2(number of hash buckets) 323 */ 324 struct mb_cache * 325 mb_cache_create(const char *name, int bucket_bits) 326 { 327 int n, bucket_count = 1 << bucket_bits; 328 struct mb_cache *cache = NULL; 329 330 if (!mb_cache_bg_lock) { 331 mb_cache_bg_lock = kmalloc(sizeof(struct blockgroup_lock), 332 GFP_KERNEL); 333 if (!mb_cache_bg_lock) 334 return NULL; 335 bgl_lock_init(mb_cache_bg_lock); 336 } 337 338 cache = kmalloc(sizeof(struct mb_cache), GFP_KERNEL); 339 if (!cache) 340 return NULL; 341 cache->c_name = name; 342 atomic_set(&cache->c_entry_count, 0); 343 cache->c_bucket_bits = bucket_bits; 344 cache->c_block_hash = kmalloc(bucket_count * 345 sizeof(struct hlist_bl_head), GFP_KERNEL); 346 if (!cache->c_block_hash) 347 goto fail; 348 for (n=0; n<bucket_count; n++) 349 INIT_HLIST_BL_HEAD(&cache->c_block_hash[n]); 350 cache->c_index_hash = kmalloc(bucket_count * 351 sizeof(struct hlist_bl_head), GFP_KERNEL); 352 if (!cache->c_index_hash) 353 goto fail; 354 for (n=0; n<bucket_count; n++) 355 INIT_HLIST_BL_HEAD(&cache->c_index_hash[n]); 356 if (!mb_cache_kmem_cache) { 357 mb_cache_kmem_cache = kmem_cache_create(name, 358 sizeof(struct mb_cache_entry), 0, 359 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL); 360 if (!mb_cache_kmem_cache) 361 goto fail2; 362 } 363 cache->c_entry_cache = mb_cache_kmem_cache; 364 365 /* 366 * Set an upper limit on the number of cache entries so that the hash 367 * chains won't grow too long. 368 */ 369 cache->c_max_entries = bucket_count << 4; 370 371 spin_lock(&mb_cache_spinlock); 372 list_add(&cache->c_cache_list, &mb_cache_list); 373 spin_unlock(&mb_cache_spinlock); 374 return cache; 375 376 fail2: 377 kfree(cache->c_index_hash); 378 379 fail: 380 kfree(cache->c_block_hash); 381 kfree(cache); 382 return NULL; 383 } 384 385 386 /* 387 * mb_cache_shrink() 388 * 389 * Removes all cache entries of a device from the cache. All cache entries 390 * currently in use cannot be freed, and thus remain in the cache. All others 391 * are freed. 392 * 393 * @bdev: which device's cache entries to shrink 394 */ 395 void 396 mb_cache_shrink(struct block_device *bdev) 397 { 398 LIST_HEAD(free_list); 399 struct list_head *l; 400 struct mb_cache_entry *ce, *tmp; 401 402 l = &mb_cache_lru_list; 403 spin_lock(&mb_cache_spinlock); 404 while (!list_is_last(l, &mb_cache_lru_list)) { 405 l = l->next; 406 ce = list_entry(l, struct mb_cache_entry, e_lru_list); 407 if (ce->e_bdev == bdev) { 408 list_del_init(&ce->e_lru_list); 409 if (ce->e_used || ce->e_queued || 410 atomic_read(&ce->e_refcnt)) 411 continue; 412 spin_unlock(&mb_cache_spinlock); 413 /* 414 * Prevent any find or get operation on the entry. 415 */ 416 hlist_bl_lock(ce->e_block_hash_p); 417 hlist_bl_lock(ce->e_index_hash_p); 418 /* Ignore if it is touched by a find/get */ 419 if (ce->e_used || ce->e_queued || 420 atomic_read(&ce->e_refcnt) || 421 !list_empty(&ce->e_lru_list)) { 422 hlist_bl_unlock(ce->e_index_hash_p); 423 hlist_bl_unlock(ce->e_block_hash_p); 424 l = &mb_cache_lru_list; 425 spin_lock(&mb_cache_spinlock); 426 continue; 427 } 428 __mb_cache_entry_unhash_unlock(ce); 429 mb_assert(!(ce->e_used || ce->e_queued || 430 atomic_read(&ce->e_refcnt))); 431 list_add_tail(&ce->e_lru_list, &free_list); 432 l = &mb_cache_lru_list; 433 spin_lock(&mb_cache_spinlock); 434 } 435 } 436 spin_unlock(&mb_cache_spinlock); 437 438 list_for_each_entry_safe(ce, tmp, &free_list, e_lru_list) { 439 __mb_cache_entry_forget(ce, GFP_KERNEL); 440 } 441 } 442 443 444 /* 445 * mb_cache_destroy() 446 * 447 * Shrinks the cache to its minimum possible size (hopefully 0 entries), 448 * and then destroys it. If this was the last mbcache, un-registers the 449 * mbcache from kernel memory management. 450 */ 451 void 452 mb_cache_destroy(struct mb_cache *cache) 453 { 454 LIST_HEAD(free_list); 455 struct mb_cache_entry *ce, *tmp; 456 457 spin_lock(&mb_cache_spinlock); 458 list_for_each_entry_safe(ce, tmp, &mb_cache_lru_list, e_lru_list) { 459 if (ce->e_cache == cache) 460 list_move_tail(&ce->e_lru_list, &free_list); 461 } 462 list_del(&cache->c_cache_list); 463 spin_unlock(&mb_cache_spinlock); 464 465 list_for_each_entry_safe(ce, tmp, &free_list, e_lru_list) { 466 list_del_init(&ce->e_lru_list); 467 /* 468 * Prevent any find or get operation on the entry. 469 */ 470 hlist_bl_lock(ce->e_block_hash_p); 471 hlist_bl_lock(ce->e_index_hash_p); 472 mb_assert(!(ce->e_used || ce->e_queued || 473 atomic_read(&ce->e_refcnt))); 474 __mb_cache_entry_unhash_unlock(ce); 475 __mb_cache_entry_forget(ce, GFP_KERNEL); 476 } 477 478 if (atomic_read(&cache->c_entry_count) > 0) { 479 mb_error("cache %s: %d orphaned entries", 480 cache->c_name, 481 atomic_read(&cache->c_entry_count)); 482 } 483 484 if (list_empty(&mb_cache_list)) { 485 kmem_cache_destroy(mb_cache_kmem_cache); 486 mb_cache_kmem_cache = NULL; 487 } 488 kfree(cache->c_index_hash); 489 kfree(cache->c_block_hash); 490 kfree(cache); 491 } 492 493 /* 494 * mb_cache_entry_alloc() 495 * 496 * Allocates a new cache entry. The new entry will not be valid initially, 497 * and thus cannot be looked up yet. It should be filled with data, and 498 * then inserted into the cache using mb_cache_entry_insert(). Returns NULL 499 * if no more memory was available. 500 */ 501 struct mb_cache_entry * 502 mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags) 503 { 504 struct mb_cache_entry *ce; 505 506 if (atomic_read(&cache->c_entry_count) >= cache->c_max_entries) { 507 struct list_head *l; 508 509 l = &mb_cache_lru_list; 510 spin_lock(&mb_cache_spinlock); 511 while (!list_is_last(l, &mb_cache_lru_list)) { 512 l = l->next; 513 ce = list_entry(l, struct mb_cache_entry, e_lru_list); 514 if (ce->e_cache == cache) { 515 list_del_init(&ce->e_lru_list); 516 if (ce->e_used || ce->e_queued || 517 atomic_read(&ce->e_refcnt)) 518 continue; 519 spin_unlock(&mb_cache_spinlock); 520 /* 521 * Prevent any find or get operation on the 522 * entry. 523 */ 524 hlist_bl_lock(ce->e_block_hash_p); 525 hlist_bl_lock(ce->e_index_hash_p); 526 /* Ignore if it is touched by a find/get */ 527 if (ce->e_used || ce->e_queued || 528 atomic_read(&ce->e_refcnt) || 529 !list_empty(&ce->e_lru_list)) { 530 hlist_bl_unlock(ce->e_index_hash_p); 531 hlist_bl_unlock(ce->e_block_hash_p); 532 l = &mb_cache_lru_list; 533 spin_lock(&mb_cache_spinlock); 534 continue; 535 } 536 mb_assert(list_empty(&ce->e_lru_list)); 537 mb_assert(!(ce->e_used || ce->e_queued || 538 atomic_read(&ce->e_refcnt))); 539 __mb_cache_entry_unhash_unlock(ce); 540 goto found; 541 } 542 } 543 spin_unlock(&mb_cache_spinlock); 544 } 545 546 ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags); 547 if (!ce) 548 return NULL; 549 atomic_inc(&cache->c_entry_count); 550 INIT_LIST_HEAD(&ce->e_lru_list); 551 INIT_HLIST_BL_NODE(&ce->e_block_list); 552 INIT_HLIST_BL_NODE(&ce->e_index.o_list); 553 ce->e_cache = cache; 554 ce->e_queued = 0; 555 atomic_set(&ce->e_refcnt, 0); 556 found: 557 ce->e_block_hash_p = &cache->c_block_hash[0]; 558 ce->e_index_hash_p = &cache->c_index_hash[0]; 559 ce->e_used = 1 + MB_CACHE_WRITER; 560 return ce; 561 } 562 563 564 /* 565 * mb_cache_entry_insert() 566 * 567 * Inserts an entry that was allocated using mb_cache_entry_alloc() into 568 * the cache. After this, the cache entry can be looked up, but is not yet 569 * in the lru list as the caller still holds a handle to it. Returns 0 on 570 * success, or -EBUSY if a cache entry for that device + inode exists 571 * already (this may happen after a failed lookup, but when another process 572 * has inserted the same cache entry in the meantime). 573 * 574 * @bdev: device the cache entry belongs to 575 * @block: block number 576 * @key: lookup key 577 */ 578 int 579 mb_cache_entry_insert(struct mb_cache_entry *ce, struct block_device *bdev, 580 sector_t block, unsigned int key) 581 { 582 struct mb_cache *cache = ce->e_cache; 583 unsigned int bucket; 584 struct hlist_bl_node *l; 585 struct hlist_bl_head *block_hash_p; 586 struct hlist_bl_head *index_hash_p; 587 struct mb_cache_entry *lce; 588 589 mb_assert(ce); 590 bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), 591 cache->c_bucket_bits); 592 block_hash_p = &cache->c_block_hash[bucket]; 593 hlist_bl_lock(block_hash_p); 594 hlist_bl_for_each_entry(lce, l, block_hash_p, e_block_list) { 595 if (lce->e_bdev == bdev && lce->e_block == block) { 596 hlist_bl_unlock(block_hash_p); 597 return -EBUSY; 598 } 599 } 600 mb_assert(!__mb_cache_entry_is_block_hashed(ce)); 601 __mb_cache_entry_unhash_block(ce); 602 __mb_cache_entry_unhash_index(ce); 603 ce->e_bdev = bdev; 604 ce->e_block = block; 605 ce->e_block_hash_p = block_hash_p; 606 ce->e_index.o_key = key; 607 hlist_bl_add_head(&ce->e_block_list, block_hash_p); 608 hlist_bl_unlock(block_hash_p); 609 bucket = hash_long(key, cache->c_bucket_bits); 610 index_hash_p = &cache->c_index_hash[bucket]; 611 hlist_bl_lock(index_hash_p); 612 ce->e_index_hash_p = index_hash_p; 613 hlist_bl_add_head(&ce->e_index.o_list, index_hash_p); 614 hlist_bl_unlock(index_hash_p); 615 return 0; 616 } 617 618 619 /* 620 * mb_cache_entry_release() 621 * 622 * Release a handle to a cache entry. When the last handle to a cache entry 623 * is released it is either freed (if it is invalid) or otherwise inserted 624 * in to the lru list. 625 */ 626 void 627 mb_cache_entry_release(struct mb_cache_entry *ce) 628 { 629 __mb_cache_entry_release(ce); 630 } 631 632 633 /* 634 * mb_cache_entry_free() 635 * 636 */ 637 void 638 mb_cache_entry_free(struct mb_cache_entry *ce) 639 { 640 mb_assert(ce); 641 mb_assert(list_empty(&ce->e_lru_list)); 642 hlist_bl_lock(ce->e_index_hash_p); 643 __mb_cache_entry_unhash_index(ce); 644 hlist_bl_unlock(ce->e_index_hash_p); 645 hlist_bl_lock(ce->e_block_hash_p); 646 __mb_cache_entry_unhash_block(ce); 647 hlist_bl_unlock(ce->e_block_hash_p); 648 __mb_cache_entry_release(ce); 649 } 650 651 652 /* 653 * mb_cache_entry_get() 654 * 655 * Get a cache entry by device / block number. (There can only be one entry 656 * in the cache per device and block.) Returns NULL if no such cache entry 657 * exists. The returned cache entry is locked for exclusive access ("single 658 * writer"). 659 */ 660 struct mb_cache_entry * 661 mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev, 662 sector_t block) 663 { 664 unsigned int bucket; 665 struct hlist_bl_node *l; 666 struct mb_cache_entry *ce; 667 struct hlist_bl_head *block_hash_p; 668 669 bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), 670 cache->c_bucket_bits); 671 block_hash_p = &cache->c_block_hash[bucket]; 672 /* First serialize access to the block corresponding hash chain. */ 673 hlist_bl_lock(block_hash_p); 674 hlist_bl_for_each_entry(ce, l, block_hash_p, e_block_list) { 675 mb_assert(ce->e_block_hash_p == block_hash_p); 676 if (ce->e_bdev == bdev && ce->e_block == block) { 677 /* 678 * Prevent a free from removing the entry. 679 */ 680 atomic_inc(&ce->e_refcnt); 681 hlist_bl_unlock(block_hash_p); 682 __spin_lock_mb_cache_entry(ce); 683 atomic_dec(&ce->e_refcnt); 684 if (ce->e_used > 0) { 685 DEFINE_WAIT(wait); 686 while (ce->e_used > 0) { 687 ce->e_queued++; 688 prepare_to_wait(&mb_cache_queue, &wait, 689 TASK_UNINTERRUPTIBLE); 690 __spin_unlock_mb_cache_entry(ce); 691 schedule(); 692 __spin_lock_mb_cache_entry(ce); 693 ce->e_queued--; 694 } 695 finish_wait(&mb_cache_queue, &wait); 696 } 697 ce->e_used += 1 + MB_CACHE_WRITER; 698 __spin_unlock_mb_cache_entry(ce); 699 700 if (!list_empty(&ce->e_lru_list)) { 701 spin_lock(&mb_cache_spinlock); 702 list_del_init(&ce->e_lru_list); 703 spin_unlock(&mb_cache_spinlock); 704 } 705 if (!__mb_cache_entry_is_block_hashed(ce)) { 706 __mb_cache_entry_release(ce); 707 return NULL; 708 } 709 return ce; 710 } 711 } 712 hlist_bl_unlock(block_hash_p); 713 return NULL; 714 } 715 716 #if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) 717 718 static struct mb_cache_entry * 719 __mb_cache_entry_find(struct hlist_bl_node *l, struct hlist_bl_head *head, 720 struct block_device *bdev, unsigned int key) 721 { 722 723 /* The index hash chain is alredy acquire by caller. */ 724 while (l != NULL) { 725 struct mb_cache_entry *ce = 726 hlist_bl_entry(l, struct mb_cache_entry, 727 e_index.o_list); 728 mb_assert(ce->e_index_hash_p == head); 729 if (ce->e_bdev == bdev && ce->e_index.o_key == key) { 730 /* 731 * Prevent a free from removing the entry. 732 */ 733 atomic_inc(&ce->e_refcnt); 734 hlist_bl_unlock(head); 735 __spin_lock_mb_cache_entry(ce); 736 atomic_dec(&ce->e_refcnt); 737 ce->e_used++; 738 /* Incrementing before holding the lock gives readers 739 priority over writers. */ 740 if (ce->e_used >= MB_CACHE_WRITER) { 741 DEFINE_WAIT(wait); 742 743 while (ce->e_used >= MB_CACHE_WRITER) { 744 ce->e_queued++; 745 prepare_to_wait(&mb_cache_queue, &wait, 746 TASK_UNINTERRUPTIBLE); 747 __spin_unlock_mb_cache_entry(ce); 748 schedule(); 749 __spin_lock_mb_cache_entry(ce); 750 ce->e_queued--; 751 } 752 finish_wait(&mb_cache_queue, &wait); 753 } 754 __spin_unlock_mb_cache_entry(ce); 755 if (!list_empty(&ce->e_lru_list)) { 756 spin_lock(&mb_cache_spinlock); 757 list_del_init(&ce->e_lru_list); 758 spin_unlock(&mb_cache_spinlock); 759 } 760 if (!__mb_cache_entry_is_block_hashed(ce)) { 761 __mb_cache_entry_release(ce); 762 return ERR_PTR(-EAGAIN); 763 } 764 return ce; 765 } 766 l = l->next; 767 } 768 hlist_bl_unlock(head); 769 return NULL; 770 } 771 772 773 /* 774 * mb_cache_entry_find_first() 775 * 776 * Find the first cache entry on a given device with a certain key in 777 * an additional index. Additional matches can be found with 778 * mb_cache_entry_find_next(). Returns NULL if no match was found. The 779 * returned cache entry is locked for shared access ("multiple readers"). 780 * 781 * @cache: the cache to search 782 * @bdev: the device the cache entry should belong to 783 * @key: the key in the index 784 */ 785 struct mb_cache_entry * 786 mb_cache_entry_find_first(struct mb_cache *cache, struct block_device *bdev, 787 unsigned int key) 788 { 789 unsigned int bucket = hash_long(key, cache->c_bucket_bits); 790 struct hlist_bl_node *l; 791 struct mb_cache_entry *ce = NULL; 792 struct hlist_bl_head *index_hash_p; 793 794 index_hash_p = &cache->c_index_hash[bucket]; 795 hlist_bl_lock(index_hash_p); 796 if (!hlist_bl_empty(index_hash_p)) { 797 l = hlist_bl_first(index_hash_p); 798 ce = __mb_cache_entry_find(l, index_hash_p, bdev, key); 799 } else 800 hlist_bl_unlock(index_hash_p); 801 return ce; 802 } 803 804 805 /* 806 * mb_cache_entry_find_next() 807 * 808 * Find the next cache entry on a given device with a certain key in an 809 * additional index. Returns NULL if no match could be found. The previous 810 * entry is atomatically released, so that mb_cache_entry_find_next() can 811 * be called like this: 812 * 813 * entry = mb_cache_entry_find_first(); 814 * while (entry) { 815 * ... 816 * entry = mb_cache_entry_find_next(entry, ...); 817 * } 818 * 819 * @prev: The previous match 820 * @bdev: the device the cache entry should belong to 821 * @key: the key in the index 822 */ 823 struct mb_cache_entry * 824 mb_cache_entry_find_next(struct mb_cache_entry *prev, 825 struct block_device *bdev, unsigned int key) 826 { 827 struct mb_cache *cache = prev->e_cache; 828 unsigned int bucket = hash_long(key, cache->c_bucket_bits); 829 struct hlist_bl_node *l; 830 struct mb_cache_entry *ce; 831 struct hlist_bl_head *index_hash_p; 832 833 index_hash_p = &cache->c_index_hash[bucket]; 834 mb_assert(prev->e_index_hash_p == index_hash_p); 835 hlist_bl_lock(index_hash_p); 836 mb_assert(!hlist_bl_empty(index_hash_p)); 837 l = prev->e_index.o_list.next; 838 ce = __mb_cache_entry_find(l, index_hash_p, bdev, key); 839 __mb_cache_entry_release(prev); 840 return ce; 841 } 842 843 #endif /* !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) */ 844 845 static int __init init_mbcache(void) 846 { 847 register_shrinker(&mb_cache_shrinker); 848 return 0; 849 } 850 851 static void __exit exit_mbcache(void) 852 { 853 unregister_shrinker(&mb_cache_shrinker); 854 } 855 856 module_init(init_mbcache) 857 module_exit(exit_mbcache) 858 859