1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved. 4 * Authors: David Chinner and Glauber Costa 5 * 6 * Generic LRU infrastructure 7 */ 8 #include <linux/kernel.h> 9 #include <linux/module.h> 10 #include <linux/mm.h> 11 #include <linux/list_lru.h> 12 #include <linux/slab.h> 13 #include <linux/mutex.h> 14 #include <linux/memcontrol.h> 15 #include "slab.h" 16 #include "internal.h" 17 18 #ifdef CONFIG_MEMCG 19 static LIST_HEAD(memcg_list_lrus); 20 static DEFINE_MUTEX(list_lrus_mutex); 21 22 static inline bool list_lru_memcg_aware(struct list_lru *lru) 23 { 24 return lru->memcg_aware; 25 } 26 27 static void list_lru_register(struct list_lru *lru) 28 { 29 if (!list_lru_memcg_aware(lru)) 30 return; 31 32 mutex_lock(&list_lrus_mutex); 33 list_add(&lru->list, &memcg_list_lrus); 34 mutex_unlock(&list_lrus_mutex); 35 } 36 37 static void list_lru_unregister(struct list_lru *lru) 38 { 39 if (!list_lru_memcg_aware(lru)) 40 return; 41 42 mutex_lock(&list_lrus_mutex); 43 list_del(&lru->list); 44 mutex_unlock(&list_lrus_mutex); 45 } 46 47 static int lru_shrinker_id(struct list_lru *lru) 48 { 49 return lru->shrinker_id; 50 } 51 52 static inline struct list_lru_one * 53 list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx) 54 { 55 if (list_lru_memcg_aware(lru) && idx >= 0) { 56 struct list_lru_memcg *mlru = xa_load(&lru->xa, idx); 57 58 return mlru ? &mlru->node[nid] : NULL; 59 } 60 return &lru->node[nid].lru; 61 } 62 63 static inline struct list_lru_one * 64 lock_list_lru_of_memcg(struct list_lru *lru, int nid, struct mem_cgroup *memcg, 65 bool irq, bool skip_empty) 66 { 67 struct list_lru_one *l; 68 long nr_items; 69 70 rcu_read_lock(); 71 again: 72 l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg)); 73 if (likely(l)) { 74 if (irq) 75 spin_lock_irq(&l->lock); 76 else 77 spin_lock(&l->lock); 78 nr_items = READ_ONCE(l->nr_items); 79 if (likely(nr_items != LONG_MIN)) { 80 WARN_ON(nr_items < 0); 81 rcu_read_unlock(); 82 return l; 83 } 84 if (irq) 85 spin_unlock_irq(&l->lock); 86 else 87 spin_unlock(&l->lock); 88 } 89 /* 90 * Caller may simply bail out if raced with reparenting or 91 * may iterate through the list_lru and expect empty slots. 92 */ 93 if (skip_empty) { 94 rcu_read_unlock(); 95 return NULL; 96 } 97 VM_WARN_ON(!css_is_dying(&memcg->css)); 98 memcg = parent_mem_cgroup(memcg); 99 goto again; 100 } 101 102 static inline void unlock_list_lru(struct list_lru_one *l, bool irq_off) 103 { 104 if (irq_off) 105 spin_unlock_irq(&l->lock); 106 else 107 spin_unlock(&l->lock); 108 } 109 #else 110 static void list_lru_register(struct list_lru *lru) 111 { 112 } 113 114 static void list_lru_unregister(struct list_lru *lru) 115 { 116 } 117 118 static int lru_shrinker_id(struct list_lru *lru) 119 { 120 return -1; 121 } 122 123 static inline bool list_lru_memcg_aware(struct list_lru *lru) 124 { 125 return false; 126 } 127 128 static inline struct list_lru_one * 129 list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx) 130 { 131 return &lru->node[nid].lru; 132 } 133 134 static inline struct list_lru_one * 135 lock_list_lru_of_memcg(struct list_lru *lru, int nid, struct mem_cgroup *memcg, 136 bool irq, bool skip_empty) 137 { 138 struct list_lru_one *l = &lru->node[nid].lru; 139 140 if (irq) 141 spin_lock_irq(&l->lock); 142 else 143 spin_lock(&l->lock); 144 145 return l; 146 } 147 148 static inline void unlock_list_lru(struct list_lru_one *l, bool irq_off) 149 { 150 if (irq_off) 151 spin_unlock_irq(&l->lock); 152 else 153 spin_unlock(&l->lock); 154 } 155 #endif /* CONFIG_MEMCG */ 156 157 /* The caller must ensure the memcg lifetime. */ 158 bool list_lru_add(struct list_lru *lru, struct list_head *item, int nid, 159 struct mem_cgroup *memcg) 160 { 161 struct list_lru_node *nlru = &lru->node[nid]; 162 struct list_lru_one *l; 163 164 l = lock_list_lru_of_memcg(lru, nid, memcg, false, false); 165 if (!l) 166 return false; 167 if (list_empty(item)) { 168 list_add_tail(item, &l->list); 169 /* Set shrinker bit if the first element was added */ 170 if (!l->nr_items++) 171 set_shrinker_bit(memcg, nid, lru_shrinker_id(lru)); 172 unlock_list_lru(l, false); 173 atomic_long_inc(&nlru->nr_items); 174 return true; 175 } 176 unlock_list_lru(l, false); 177 return false; 178 } 179 180 bool list_lru_add_obj(struct list_lru *lru, struct list_head *item) 181 { 182 bool ret; 183 int nid = page_to_nid(virt_to_page(item)); 184 185 if (list_lru_memcg_aware(lru)) { 186 rcu_read_lock(); 187 ret = list_lru_add(lru, item, nid, mem_cgroup_from_slab_obj(item)); 188 rcu_read_unlock(); 189 } else { 190 ret = list_lru_add(lru, item, nid, NULL); 191 } 192 193 return ret; 194 } 195 EXPORT_SYMBOL_GPL(list_lru_add_obj); 196 197 /* The caller must ensure the memcg lifetime. */ 198 bool list_lru_del(struct list_lru *lru, struct list_head *item, int nid, 199 struct mem_cgroup *memcg) 200 { 201 struct list_lru_node *nlru = &lru->node[nid]; 202 struct list_lru_one *l; 203 l = lock_list_lru_of_memcg(lru, nid, memcg, false, false); 204 if (!l) 205 return false; 206 if (!list_empty(item)) { 207 list_del_init(item); 208 l->nr_items--; 209 unlock_list_lru(l, false); 210 atomic_long_dec(&nlru->nr_items); 211 return true; 212 } 213 unlock_list_lru(l, false); 214 return false; 215 } 216 217 bool list_lru_del_obj(struct list_lru *lru, struct list_head *item) 218 { 219 bool ret; 220 int nid = page_to_nid(virt_to_page(item)); 221 222 if (list_lru_memcg_aware(lru)) { 223 rcu_read_lock(); 224 ret = list_lru_del(lru, item, nid, mem_cgroup_from_slab_obj(item)); 225 rcu_read_unlock(); 226 } else { 227 ret = list_lru_del(lru, item, nid, NULL); 228 } 229 230 return ret; 231 } 232 EXPORT_SYMBOL_GPL(list_lru_del_obj); 233 234 void list_lru_isolate(struct list_lru_one *list, struct list_head *item) 235 { 236 list_del_init(item); 237 list->nr_items--; 238 } 239 EXPORT_SYMBOL_GPL(list_lru_isolate); 240 241 void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item, 242 struct list_head *head) 243 { 244 list_move(item, head); 245 list->nr_items--; 246 } 247 EXPORT_SYMBOL_GPL(list_lru_isolate_move); 248 249 unsigned long list_lru_count_one(struct list_lru *lru, 250 int nid, struct mem_cgroup *memcg) 251 { 252 struct list_lru_one *l; 253 long count; 254 255 rcu_read_lock(); 256 l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg)); 257 count = l ? READ_ONCE(l->nr_items) : 0; 258 rcu_read_unlock(); 259 260 if (unlikely(count < 0)) 261 count = 0; 262 263 return count; 264 } 265 EXPORT_SYMBOL_GPL(list_lru_count_one); 266 267 unsigned long list_lru_count_node(struct list_lru *lru, int nid) 268 { 269 struct list_lru_node *nlru; 270 271 nlru = &lru->node[nid]; 272 return atomic_long_read(&nlru->nr_items); 273 } 274 EXPORT_SYMBOL_GPL(list_lru_count_node); 275 276 static unsigned long 277 __list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg, 278 list_lru_walk_cb isolate, void *cb_arg, 279 unsigned long *nr_to_walk, bool irq_off) 280 { 281 struct list_lru_node *nlru = &lru->node[nid]; 282 struct list_lru_one *l = NULL; 283 struct list_head *item, *n; 284 unsigned long isolated = 0; 285 286 restart: 287 l = lock_list_lru_of_memcg(lru, nid, memcg, irq_off, true); 288 if (!l) 289 return isolated; 290 list_for_each_safe(item, n, &l->list) { 291 enum lru_status ret; 292 293 /* 294 * decrement nr_to_walk first so that we don't livelock if we 295 * get stuck on large numbers of LRU_RETRY items 296 */ 297 if (!*nr_to_walk) 298 break; 299 --*nr_to_walk; 300 301 ret = isolate(item, l, cb_arg); 302 switch (ret) { 303 /* 304 * LRU_RETRY, LRU_REMOVED_RETRY and LRU_STOP will drop the lru 305 * lock. List traversal will have to restart from scratch. 306 */ 307 case LRU_RETRY: 308 goto restart; 309 case LRU_REMOVED_RETRY: 310 fallthrough; 311 case LRU_REMOVED: 312 isolated++; 313 atomic_long_dec(&nlru->nr_items); 314 if (ret == LRU_REMOVED_RETRY) 315 goto restart; 316 break; 317 case LRU_ROTATE: 318 list_move_tail(item, &l->list); 319 break; 320 case LRU_SKIP: 321 break; 322 case LRU_STOP: 323 goto out; 324 default: 325 BUG(); 326 } 327 } 328 unlock_list_lru(l, irq_off); 329 out: 330 return isolated; 331 } 332 333 unsigned long 334 list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg, 335 list_lru_walk_cb isolate, void *cb_arg, 336 unsigned long *nr_to_walk) 337 { 338 return __list_lru_walk_one(lru, nid, memcg, isolate, 339 cb_arg, nr_to_walk, false); 340 } 341 EXPORT_SYMBOL_GPL(list_lru_walk_one); 342 343 unsigned long 344 list_lru_walk_one_irq(struct list_lru *lru, int nid, struct mem_cgroup *memcg, 345 list_lru_walk_cb isolate, void *cb_arg, 346 unsigned long *nr_to_walk) 347 { 348 return __list_lru_walk_one(lru, nid, memcg, isolate, 349 cb_arg, nr_to_walk, true); 350 } 351 352 unsigned long list_lru_walk_node(struct list_lru *lru, int nid, 353 list_lru_walk_cb isolate, void *cb_arg, 354 unsigned long *nr_to_walk) 355 { 356 long isolated = 0; 357 358 isolated += list_lru_walk_one(lru, nid, NULL, isolate, cb_arg, 359 nr_to_walk); 360 361 #ifdef CONFIG_MEMCG 362 if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) { 363 struct list_lru_memcg *mlru; 364 struct mem_cgroup *memcg; 365 unsigned long index; 366 367 xa_for_each(&lru->xa, index, mlru) { 368 rcu_read_lock(); 369 memcg = mem_cgroup_from_id(index); 370 if (!mem_cgroup_tryget(memcg)) { 371 rcu_read_unlock(); 372 continue; 373 } 374 rcu_read_unlock(); 375 isolated += __list_lru_walk_one(lru, nid, memcg, 376 isolate, cb_arg, 377 nr_to_walk, false); 378 mem_cgroup_put(memcg); 379 380 if (*nr_to_walk <= 0) 381 break; 382 } 383 } 384 #endif 385 386 return isolated; 387 } 388 EXPORT_SYMBOL_GPL(list_lru_walk_node); 389 390 static void init_one_lru(struct list_lru *lru, struct list_lru_one *l) 391 { 392 INIT_LIST_HEAD(&l->list); 393 spin_lock_init(&l->lock); 394 l->nr_items = 0; 395 #ifdef CONFIG_LOCKDEP 396 if (lru->key) 397 lockdep_set_class(&l->lock, lru->key); 398 #endif 399 } 400 401 #ifdef CONFIG_MEMCG 402 static struct list_lru_memcg *memcg_init_list_lru_one(struct list_lru *lru, gfp_t gfp) 403 { 404 int nid; 405 struct list_lru_memcg *mlru; 406 407 mlru = kmalloc(struct_size(mlru, node, nr_node_ids), gfp); 408 if (!mlru) 409 return NULL; 410 411 for_each_node(nid) 412 init_one_lru(lru, &mlru->node[nid]); 413 414 return mlru; 415 } 416 417 static inline void memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) 418 { 419 if (memcg_aware) 420 xa_init_flags(&lru->xa, XA_FLAGS_LOCK_IRQ); 421 lru->memcg_aware = memcg_aware; 422 } 423 424 static void memcg_destroy_list_lru(struct list_lru *lru) 425 { 426 XA_STATE(xas, &lru->xa, 0); 427 struct list_lru_memcg *mlru; 428 429 if (!list_lru_memcg_aware(lru)) 430 return; 431 432 xas_lock_irq(&xas); 433 xas_for_each(&xas, mlru, ULONG_MAX) { 434 kfree(mlru); 435 xas_store(&xas, NULL); 436 } 437 xas_unlock_irq(&xas); 438 } 439 440 static void memcg_reparent_list_lru_one(struct list_lru *lru, int nid, 441 struct list_lru_one *src, 442 struct mem_cgroup *dst_memcg) 443 { 444 int dst_idx = dst_memcg->kmemcg_id; 445 struct list_lru_one *dst; 446 447 spin_lock_irq(&src->lock); 448 dst = list_lru_from_memcg_idx(lru, nid, dst_idx); 449 spin_lock_nested(&dst->lock, SINGLE_DEPTH_NESTING); 450 451 list_splice_init(&src->list, &dst->list); 452 if (src->nr_items) { 453 dst->nr_items += src->nr_items; 454 set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru)); 455 } 456 /* Mark the list_lru_one dead */ 457 src->nr_items = LONG_MIN; 458 459 spin_unlock(&dst->lock); 460 spin_unlock_irq(&src->lock); 461 } 462 463 void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *parent) 464 { 465 struct list_lru *lru; 466 int i; 467 468 mutex_lock(&list_lrus_mutex); 469 list_for_each_entry(lru, &memcg_list_lrus, list) { 470 struct list_lru_memcg *mlru; 471 XA_STATE(xas, &lru->xa, memcg->kmemcg_id); 472 473 /* 474 * Lock the Xarray to ensure no on going list_lru_memcg 475 * allocation and further allocation will see css_is_dying(). 476 */ 477 xas_lock_irq(&xas); 478 mlru = xas_store(&xas, NULL); 479 xas_unlock_irq(&xas); 480 if (!mlru) 481 continue; 482 483 /* 484 * With Xarray value set to NULL, holding the lru lock below 485 * prevents list_lru_{add,del,isolate} from touching the lru, 486 * safe to reparent. 487 */ 488 for_each_node(i) 489 memcg_reparent_list_lru_one(lru, i, &mlru->node[i], parent); 490 491 /* 492 * Here all list_lrus corresponding to the cgroup are guaranteed 493 * to remain empty, we can safely free this lru, any further 494 * memcg_list_lru_alloc() call will simply bail out. 495 */ 496 kvfree_rcu(mlru, rcu); 497 } 498 mutex_unlock(&list_lrus_mutex); 499 } 500 501 static inline bool memcg_list_lru_allocated(struct mem_cgroup *memcg, 502 struct list_lru *lru) 503 { 504 int idx = memcg->kmemcg_id; 505 506 return idx < 0 || xa_load(&lru->xa, idx); 507 } 508 509 int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru, 510 gfp_t gfp) 511 { 512 unsigned long flags; 513 struct list_lru_memcg *mlru; 514 struct mem_cgroup *pos, *parent; 515 XA_STATE(xas, &lru->xa, 0); 516 517 if (!list_lru_memcg_aware(lru) || memcg_list_lru_allocated(memcg, lru)) 518 return 0; 519 520 gfp &= GFP_RECLAIM_MASK; 521 /* 522 * Because the list_lru can be reparented to the parent cgroup's 523 * list_lru, we should make sure that this cgroup and all its 524 * ancestors have allocated list_lru_memcg. 525 */ 526 do { 527 /* 528 * Keep finding the farest parent that wasn't populated 529 * until found memcg itself. 530 */ 531 pos = memcg; 532 parent = parent_mem_cgroup(pos); 533 while (!memcg_list_lru_allocated(parent, lru)) { 534 pos = parent; 535 parent = parent_mem_cgroup(pos); 536 } 537 538 mlru = memcg_init_list_lru_one(lru, gfp); 539 if (!mlru) 540 return -ENOMEM; 541 xas_set(&xas, pos->kmemcg_id); 542 do { 543 xas_lock_irqsave(&xas, flags); 544 if (!xas_load(&xas) && !css_is_dying(&pos->css)) { 545 xas_store(&xas, mlru); 546 if (!xas_error(&xas)) 547 mlru = NULL; 548 } 549 xas_unlock_irqrestore(&xas, flags); 550 } while (xas_nomem(&xas, gfp)); 551 if (mlru) 552 kfree(mlru); 553 } while (pos != memcg && !css_is_dying(&pos->css)); 554 555 return xas_error(&xas); 556 } 557 #else 558 static inline void memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) 559 { 560 } 561 562 static void memcg_destroy_list_lru(struct list_lru *lru) 563 { 564 } 565 #endif /* CONFIG_MEMCG */ 566 567 int __list_lru_init(struct list_lru *lru, bool memcg_aware, struct shrinker *shrinker) 568 { 569 int i; 570 571 #ifdef CONFIG_MEMCG 572 if (shrinker) 573 lru->shrinker_id = shrinker->id; 574 else 575 lru->shrinker_id = -1; 576 577 if (mem_cgroup_kmem_disabled()) 578 memcg_aware = false; 579 #endif 580 581 lru->node = kcalloc(nr_node_ids, sizeof(*lru->node), GFP_KERNEL); 582 if (!lru->node) 583 return -ENOMEM; 584 585 for_each_node(i) 586 init_one_lru(lru, &lru->node[i].lru); 587 588 memcg_init_list_lru(lru, memcg_aware); 589 list_lru_register(lru); 590 591 return 0; 592 } 593 EXPORT_SYMBOL_GPL(__list_lru_init); 594 595 void list_lru_destroy(struct list_lru *lru) 596 { 597 /* Already destroyed or not yet initialized? */ 598 if (!lru->node) 599 return; 600 601 list_lru_unregister(lru); 602 603 memcg_destroy_list_lru(lru); 604 kfree(lru->node); 605 lru->node = NULL; 606 607 #ifdef CONFIG_MEMCG 608 lru->shrinker_id = -1; 609 #endif 610 } 611 EXPORT_SYMBOL_GPL(list_lru_destroy); 612