1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 lru_cache.c 4 5 This file is part of DRBD by Philipp Reisner and Lars Ellenberg. 6 7 Copyright (C) 2003-2008, LINBIT Information Technologies GmbH. 8 Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>. 9 Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. 10 11 12 */ 13 14 #include <linux/module.h> 15 #include <linux/bitops.h> 16 #include <linux/slab.h> 17 #include <linux/string.h> /* for memset */ 18 #include <linux/seq_file.h> /* for seq_printf */ 19 #include <linux/lru_cache.h> 20 21 MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, " 22 "Lars Ellenberg <lars@linbit.com>"); 23 MODULE_DESCRIPTION("lru_cache - Track sets of hot objects"); 24 MODULE_LICENSE("GPL"); 25 26 /* this is developers aid only. 27 * it catches concurrent access (lack of locking on the users part) */ 28 #define PARANOIA_ENTRY() do { \ 29 BUG_ON(!lc); \ 30 BUG_ON(!lc->nr_elements); \ 31 BUG_ON(test_and_set_bit(__LC_PARANOIA, &lc->flags)); \ 32 } while (0) 33 34 #define RETURN(x...) do { \ 35 clear_bit_unlock(__LC_PARANOIA, &lc->flags); \ 36 return x ; } while (0) 37 38 /* BUG() if e is not one of the elements tracked by lc */ 39 #define PARANOIA_LC_ELEMENT(lc, e) do { \ 40 struct lru_cache *lc_ = (lc); \ 41 struct lc_element *e_ = (e); \ 42 unsigned i = e_->lc_index; \ 43 BUG_ON(i >= lc_->nr_elements); \ 44 BUG_ON(lc_->lc_element[i] != e_); } while (0) 45 46 47 /* We need to atomically 48 * - try to grab the lock (set LC_LOCKED) 49 * - only if there is no pending transaction 50 * (neither LC_DIRTY nor LC_STARVING is set) 51 * Because of PARANOIA_ENTRY() above abusing lc->flags as well, 52 * it is not sufficient to just say 53 * return 0 == cmpxchg(&lc->flags, 0, LC_LOCKED); 54 */ 55 int lc_try_lock(struct lru_cache *lc) 56 { 57 unsigned long val; 58 do { 59 val = cmpxchg(&lc->flags, 0, LC_LOCKED); 60 } while (unlikely (val == LC_PARANOIA)); 61 /* Spin until no-one is inside a PARANOIA_ENTRY()/RETURN() section. */ 62 return 0 == val; 63 } 64 65 /** 66 * lc_create - prepares to track objects in an active set 67 * @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump_details 68 * @cache: cache root pointer 69 * @max_pending_changes: maximum changes to accumulate until a transaction is required 70 * @e_count: number of elements allowed to be active simultaneously 71 * @e_size: size of the tracked objects 72 * @e_off: offset to the &struct lc_element member in a tracked object 73 * 74 * Returns a pointer to a newly initialized struct lru_cache on success, 75 * or NULL on (allocation) failure. 76 */ 77 struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, 78 unsigned max_pending_changes, 79 unsigned e_count, size_t e_size, size_t e_off) 80 { 81 struct hlist_head *slot = NULL; 82 struct lc_element **element = NULL; 83 struct lru_cache *lc; 84 struct lc_element *e; 85 unsigned cache_obj_size = kmem_cache_size(cache); 86 unsigned i; 87 88 WARN_ON(cache_obj_size < e_size); 89 if (cache_obj_size < e_size) 90 return NULL; 91 92 /* e_count too big; would probably fail the allocation below anyways. 93 * for typical use cases, e_count should be few thousand at most. */ 94 if (e_count > LC_MAX_ACTIVE) 95 return NULL; 96 97 slot = kcalloc(e_count, sizeof(struct hlist_head), GFP_KERNEL); 98 if (!slot) 99 goto out_fail; 100 element = kcalloc(e_count, sizeof(struct lc_element *), GFP_KERNEL); 101 if (!element) 102 goto out_fail; 103 104 lc = kzalloc(sizeof(*lc), GFP_KERNEL); 105 if (!lc) 106 goto out_fail; 107 108 INIT_LIST_HEAD(&lc->in_use); 109 INIT_LIST_HEAD(&lc->lru); 110 INIT_LIST_HEAD(&lc->free); 111 INIT_LIST_HEAD(&lc->to_be_changed); 112 113 lc->name = name; 114 lc->element_size = e_size; 115 lc->element_off = e_off; 116 lc->nr_elements = e_count; 117 lc->max_pending_changes = max_pending_changes; 118 lc->lc_cache = cache; 119 lc->lc_element = element; 120 lc->lc_slot = slot; 121 122 /* preallocate all objects */ 123 for (i = 0; i < e_count; i++) { 124 void *p = kmem_cache_alloc(cache, GFP_KERNEL); 125 if (!p) 126 break; 127 memset(p, 0, lc->element_size); 128 e = p + e_off; 129 e->lc_index = i; 130 e->lc_number = LC_FREE; 131 e->lc_new_number = LC_FREE; 132 list_add(&e->list, &lc->free); 133 element[i] = e; 134 } 135 if (i == e_count) 136 return lc; 137 138 /* else: could not allocate all elements, give up */ 139 while (i) { 140 void *p = element[--i]; 141 kmem_cache_free(cache, p - e_off); 142 } 143 kfree(lc); 144 out_fail: 145 kfree(element); 146 kfree(slot); 147 return NULL; 148 } 149 150 static void lc_free_by_index(struct lru_cache *lc, unsigned i) 151 { 152 void *p = lc->lc_element[i]; 153 WARN_ON(!p); 154 if (p) { 155 p -= lc->element_off; 156 kmem_cache_free(lc->lc_cache, p); 157 } 158 } 159 160 /** 161 * lc_destroy - frees memory allocated by lc_create() 162 * @lc: the lru cache to destroy 163 */ 164 void lc_destroy(struct lru_cache *lc) 165 { 166 unsigned i; 167 if (!lc) 168 return; 169 for (i = 0; i < lc->nr_elements; i++) 170 lc_free_by_index(lc, i); 171 kfree(lc->lc_element); 172 kfree(lc->lc_slot); 173 kfree(lc); 174 } 175 176 /** 177 * lc_reset - does a full reset for @lc and the hash table slots. 178 * @lc: the lru cache to operate on 179 * 180 * It is roughly the equivalent of re-allocating a fresh lru_cache object, 181 * basically a short cut to lc_destroy(lc); lc = lc_create(...); 182 */ 183 void lc_reset(struct lru_cache *lc) 184 { 185 unsigned i; 186 187 INIT_LIST_HEAD(&lc->in_use); 188 INIT_LIST_HEAD(&lc->lru); 189 INIT_LIST_HEAD(&lc->free); 190 INIT_LIST_HEAD(&lc->to_be_changed); 191 lc->used = 0; 192 lc->hits = 0; 193 lc->misses = 0; 194 lc->starving = 0; 195 lc->locked = 0; 196 lc->changed = 0; 197 lc->pending_changes = 0; 198 lc->flags = 0; 199 memset(lc->lc_slot, 0, sizeof(struct hlist_head) * lc->nr_elements); 200 201 for (i = 0; i < lc->nr_elements; i++) { 202 struct lc_element *e = lc->lc_element[i]; 203 void *p = e; 204 p -= lc->element_off; 205 memset(p, 0, lc->element_size); 206 /* re-init it */ 207 e->lc_index = i; 208 e->lc_number = LC_FREE; 209 e->lc_new_number = LC_FREE; 210 list_add(&e->list, &lc->free); 211 } 212 } 213 214 /** 215 * lc_seq_printf_stats - print stats about @lc into @seq 216 * @seq: the seq_file to print into 217 * @lc: the lru cache to print statistics of 218 */ 219 void lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc) 220 { 221 /* NOTE: 222 * total calls to lc_get are 223 * (starving + hits + misses) 224 * misses include "locked" count (update from an other thread in 225 * progress) and "changed", when this in fact lead to an successful 226 * update of the cache. 227 */ 228 seq_printf(seq, "\t%s: used:%u/%u hits:%lu misses:%lu starving:%lu locked:%lu changed:%lu\n", 229 lc->name, lc->used, lc->nr_elements, 230 lc->hits, lc->misses, lc->starving, lc->locked, lc->changed); 231 } 232 233 static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) 234 { 235 return lc->lc_slot + (enr % lc->nr_elements); 236 } 237 238 239 static struct lc_element *__lc_find(struct lru_cache *lc, unsigned int enr, 240 bool include_changing) 241 { 242 struct lc_element *e; 243 244 BUG_ON(!lc); 245 BUG_ON(!lc->nr_elements); 246 hlist_for_each_entry(e, lc_hash_slot(lc, enr), collision) { 247 /* "about to be changed" elements, pending transaction commit, 248 * are hashed by their "new number". "Normal" elements have 249 * lc_number == lc_new_number. */ 250 if (e->lc_new_number != enr) 251 continue; 252 if (e->lc_new_number == e->lc_number || include_changing) 253 return e; 254 break; 255 } 256 return NULL; 257 } 258 259 /** 260 * lc_find - find element by label, if present in the hash table 261 * @lc: The lru_cache object 262 * @enr: element number 263 * 264 * Returns the pointer to an element, if the element with the requested 265 * "label" or element number is present in the hash table, 266 * or NULL if not found. Does not change the refcnt. 267 * Ignores elements that are "about to be used", i.e. not yet in the active 268 * set, but still pending transaction commit. 269 */ 270 struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr) 271 { 272 return __lc_find(lc, enr, 0); 273 } 274 275 /** 276 * lc_is_used - find element by label 277 * @lc: The lru_cache object 278 * @enr: element number 279 * 280 * Returns true, if the element with the requested "label" or element number is 281 * present in the hash table, and is used (refcnt > 0). 282 * Also finds elements that are not _currently_ used but only "about to be 283 * used", i.e. on the "to_be_changed" list, pending transaction commit. 284 */ 285 bool lc_is_used(struct lru_cache *lc, unsigned int enr) 286 { 287 struct lc_element *e = __lc_find(lc, enr, 1); 288 return e && e->refcnt; 289 } 290 291 /** 292 * lc_del - removes an element from the cache 293 * @lc: The lru_cache object 294 * @e: The element to remove 295 * 296 * @e must be unused (refcnt == 0). Moves @e from "lru" to "free" list, 297 * sets @e->enr to %LC_FREE. 298 */ 299 void lc_del(struct lru_cache *lc, struct lc_element *e) 300 { 301 PARANOIA_ENTRY(); 302 PARANOIA_LC_ELEMENT(lc, e); 303 BUG_ON(e->refcnt); 304 305 e->lc_number = e->lc_new_number = LC_FREE; 306 hlist_del_init(&e->collision); 307 list_move(&e->list, &lc->free); 308 RETURN(); 309 } 310 311 static struct lc_element *lc_prepare_for_change(struct lru_cache *lc, unsigned new_number) 312 { 313 struct list_head *n; 314 struct lc_element *e; 315 316 if (!list_empty(&lc->free)) 317 n = lc->free.next; 318 else if (!list_empty(&lc->lru)) 319 n = lc->lru.prev; 320 else 321 return NULL; 322 323 e = list_entry(n, struct lc_element, list); 324 PARANOIA_LC_ELEMENT(lc, e); 325 326 e->lc_new_number = new_number; 327 if (!hlist_unhashed(&e->collision)) 328 __hlist_del(&e->collision); 329 hlist_add_head(&e->collision, lc_hash_slot(lc, new_number)); 330 list_move(&e->list, &lc->to_be_changed); 331 332 return e; 333 } 334 335 static int lc_unused_element_available(struct lru_cache *lc) 336 { 337 if (!list_empty(&lc->free)) 338 return 1; /* something on the free list */ 339 if (!list_empty(&lc->lru)) 340 return 1; /* something to evict */ 341 342 return 0; 343 } 344 345 /* used as internal flags to __lc_get */ 346 enum { 347 LC_GET_MAY_CHANGE = 1, 348 LC_GET_MAY_USE_UNCOMMITTED = 2, 349 }; 350 351 static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, unsigned int flags) 352 { 353 struct lc_element *e; 354 355 PARANOIA_ENTRY(); 356 if (test_bit(__LC_STARVING, &lc->flags)) { 357 ++lc->starving; 358 RETURN(NULL); 359 } 360 361 e = __lc_find(lc, enr, 1); 362 /* if lc_new_number != lc_number, 363 * this enr is currently being pulled in already, 364 * and will be available once the pending transaction 365 * has been committed. */ 366 if (e) { 367 if (e->lc_new_number != e->lc_number) { 368 /* It has been found above, but on the "to_be_changed" 369 * list, not yet committed. Don't pull it in twice, 370 * wait for the transaction, then try again... 371 */ 372 if (!(flags & LC_GET_MAY_USE_UNCOMMITTED)) 373 RETURN(NULL); 374 /* ... unless the caller is aware of the implications, 375 * probably preparing a cumulative transaction. */ 376 ++e->refcnt; 377 ++lc->hits; 378 RETURN(e); 379 } 380 /* else: lc_new_number == lc_number; a real hit. */ 381 ++lc->hits; 382 if (e->refcnt++ == 0) 383 lc->used++; 384 list_move(&e->list, &lc->in_use); /* Not evictable... */ 385 RETURN(e); 386 } 387 /* e == NULL */ 388 389 ++lc->misses; 390 if (!(flags & LC_GET_MAY_CHANGE)) 391 RETURN(NULL); 392 393 /* To avoid races with lc_try_lock(), first, mark us dirty 394 * (using test_and_set_bit, as it implies memory barriers), ... */ 395 test_and_set_bit(__LC_DIRTY, &lc->flags); 396 397 /* ... only then check if it is locked anyways. If lc_unlock clears 398 * the dirty bit again, that's not a problem, we will come here again. 399 */ 400 if (test_bit(__LC_LOCKED, &lc->flags)) { 401 ++lc->locked; 402 RETURN(NULL); 403 } 404 405 /* In case there is nothing available and we can not kick out 406 * the LRU element, we have to wait ... 407 */ 408 if (!lc_unused_element_available(lc)) { 409 set_bit(__LC_STARVING, &lc->flags); 410 RETURN(NULL); 411 } 412 413 /* It was not present in the active set. We are going to recycle an 414 * unused (or even "free") element, but we won't accumulate more than 415 * max_pending_changes changes. */ 416 if (lc->pending_changes >= lc->max_pending_changes) 417 RETURN(NULL); 418 419 e = lc_prepare_for_change(lc, enr); 420 BUG_ON(!e); 421 422 clear_bit(__LC_STARVING, &lc->flags); 423 BUG_ON(++e->refcnt != 1); 424 lc->used++; 425 lc->pending_changes++; 426 427 RETURN(e); 428 } 429 430 /** 431 * lc_get - get element by label, maybe change the active set 432 * @lc: the lru cache to operate on 433 * @enr: the label to look up 434 * 435 * Finds an element in the cache, increases its usage count, 436 * "touches" and returns it. 437 * 438 * In case the requested number is not present, it needs to be added to the 439 * cache. Therefore it is possible that an other element becomes evicted from 440 * the cache. In either case, the user is notified so he is able to e.g. keep 441 * a persistent log of the cache changes, and therefore the objects in use. 442 * 443 * Return values: 444 * NULL 445 * The cache was marked %LC_STARVING, 446 * or the requested label was not in the active set 447 * and a changing transaction is still pending (@lc was marked %LC_DIRTY). 448 * Or no unused or free element could be recycled (@lc will be marked as 449 * %LC_STARVING, blocking further lc_get() operations). 450 * 451 * pointer to the element with the REQUESTED element number. 452 * In this case, it can be used right away 453 * 454 * pointer to an UNUSED element with some different element number, 455 * where that different number may also be %LC_FREE. 456 * 457 * In this case, the cache is marked %LC_DIRTY, 458 * so lc_try_lock() will no longer succeed. 459 * The returned element pointer is moved to the "to_be_changed" list, 460 * and registered with the new element number on the hash collision chains, 461 * so it is possible to pick it up from lc_is_used(). 462 * Up to "max_pending_changes" (see lc_create()) can be accumulated. 463 * The user now should do whatever housekeeping is necessary, 464 * typically serialize on lc_try_lock_for_transaction(), then call 465 * lc_committed(lc) and lc_unlock(), to finish the change. 466 * 467 * NOTE: The user needs to check the lc_number on EACH use, so he recognizes 468 * any cache set change. 469 */ 470 struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr) 471 { 472 return __lc_get(lc, enr, LC_GET_MAY_CHANGE); 473 } 474 475 /** 476 * lc_get_cumulative - like lc_get; also finds to-be-changed elements 477 * @lc: the lru cache to operate on 478 * @enr: the label to look up 479 * 480 * Unlike lc_get this also returns the element for @enr, if it is belonging to 481 * a pending transaction, so the return values are like for lc_get(), 482 * plus: 483 * 484 * pointer to an element already on the "to_be_changed" list. 485 * In this case, the cache was already marked %LC_DIRTY. 486 * 487 * Caller needs to make sure that the pending transaction is completed, 488 * before proceeding to actually use this element. 489 */ 490 struct lc_element *lc_get_cumulative(struct lru_cache *lc, unsigned int enr) 491 { 492 return __lc_get(lc, enr, LC_GET_MAY_CHANGE|LC_GET_MAY_USE_UNCOMMITTED); 493 } 494 495 /** 496 * lc_try_get - get element by label, if present; do not change the active set 497 * @lc: the lru cache to operate on 498 * @enr: the label to look up 499 * 500 * Finds an element in the cache, increases its usage count, 501 * "touches" and returns it. 502 * 503 * Return values: 504 * NULL 505 * The cache was marked %LC_STARVING, 506 * or the requested label was not in the active set 507 * 508 * pointer to the element with the REQUESTED element number. 509 * In this case, it can be used right away 510 */ 511 struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr) 512 { 513 return __lc_get(lc, enr, 0); 514 } 515 516 /** 517 * lc_committed - tell @lc that pending changes have been recorded 518 * @lc: the lru cache to operate on 519 * 520 * User is expected to serialize on explicit lc_try_lock_for_transaction() 521 * before the transaction is started, and later needs to lc_unlock() explicitly 522 * as well. 523 */ 524 void lc_committed(struct lru_cache *lc) 525 { 526 struct lc_element *e, *tmp; 527 528 PARANOIA_ENTRY(); 529 list_for_each_entry_safe(e, tmp, &lc->to_be_changed, list) { 530 /* count number of changes, not number of transactions */ 531 ++lc->changed; 532 e->lc_number = e->lc_new_number; 533 list_move(&e->list, &lc->in_use); 534 } 535 lc->pending_changes = 0; 536 RETURN(); 537 } 538 539 540 /** 541 * lc_put - give up refcnt of @e 542 * @lc: the lru cache to operate on 543 * @e: the element to put 544 * 545 * If refcnt reaches zero, the element is moved to the lru list, 546 * and a %LC_STARVING (if set) is cleared. 547 * Returns the new (post-decrement) refcnt. 548 */ 549 unsigned int lc_put(struct lru_cache *lc, struct lc_element *e) 550 { 551 PARANOIA_ENTRY(); 552 PARANOIA_LC_ELEMENT(lc, e); 553 BUG_ON(e->refcnt == 0); 554 BUG_ON(e->lc_number != e->lc_new_number); 555 if (--e->refcnt == 0) { 556 /* move it to the front of LRU. */ 557 list_move(&e->list, &lc->lru); 558 lc->used--; 559 clear_bit_unlock(__LC_STARVING, &lc->flags); 560 } 561 RETURN(e->refcnt); 562 } 563 564 /** 565 * lc_element_by_index 566 * @lc: the lru cache to operate on 567 * @i: the index of the element to return 568 */ 569 struct lc_element *lc_element_by_index(struct lru_cache *lc, unsigned i) 570 { 571 BUG_ON(i >= lc->nr_elements); 572 BUG_ON(lc->lc_element[i] == NULL); 573 BUG_ON(lc->lc_element[i]->lc_index != i); 574 return lc->lc_element[i]; 575 } 576 577 /** 578 * lc_seq_dump_details - Dump a complete LRU cache to seq in textual form. 579 * @lc: the lru cache to operate on 580 * @seq: the &struct seq_file pointer to seq_printf into 581 * @utext: user supplied additional "heading" or other info 582 * @detail: function pointer the user may provide to dump further details 583 * of the object the lc_element is embedded in. May be NULL. 584 * Note: a leading space ' ' and trailing newline '\n' is implied. 585 */ 586 void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext, 587 void (*detail) (struct seq_file *, struct lc_element *)) 588 { 589 unsigned int nr_elements = lc->nr_elements; 590 struct lc_element *e; 591 int i; 592 593 seq_printf(seq, "\tnn: lc_number (new nr) refcnt %s\n ", utext); 594 for (i = 0; i < nr_elements; i++) { 595 e = lc_element_by_index(lc, i); 596 if (e->lc_number != e->lc_new_number) 597 seq_printf(seq, "\t%5d: %6d %8d %6d ", 598 i, e->lc_number, e->lc_new_number, e->refcnt); 599 else 600 seq_printf(seq, "\t%5d: %6d %-8s %6d ", 601 i, e->lc_number, "-\"-", e->refcnt); 602 if (detail) 603 detail(seq, e); 604 seq_putc(seq, '\n'); 605 } 606 } 607 608 EXPORT_SYMBOL(lc_create); 609 EXPORT_SYMBOL(lc_reset); 610 EXPORT_SYMBOL(lc_destroy); 611 EXPORT_SYMBOL(lc_del); 612 EXPORT_SYMBOL(lc_try_get); 613 EXPORT_SYMBOL(lc_find); 614 EXPORT_SYMBOL(lc_get); 615 EXPORT_SYMBOL(lc_put); 616 EXPORT_SYMBOL(lc_committed); 617 EXPORT_SYMBOL(lc_element_by_index); 618 EXPORT_SYMBOL(lc_seq_printf_stats); 619 EXPORT_SYMBOL(lc_seq_dump_details); 620 EXPORT_SYMBOL(lc_try_lock); 621 EXPORT_SYMBOL(lc_is_used); 622 EXPORT_SYMBOL(lc_get_cumulative); 623