1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved. 4 * Authors: David Chinner and Glauber Costa 5 * 6 * Generic LRU infrastructure 7 */ 8 #include <linux/kernel.h> 9 #include <linux/module.h> 10 #include <linux/mm.h> 11 #include <linux/list_lru.h> 12 #include <linux/slab.h> 13 #include <linux/mutex.h> 14 #include <linux/memcontrol.h> 15 #include "slab.h" 16 #include "internal.h" 17 18 #ifdef CONFIG_MEMCG 19 static LIST_HEAD(memcg_list_lrus); 20 static DEFINE_MUTEX(list_lrus_mutex); 21 22 static inline bool list_lru_memcg_aware(struct list_lru *lru) 23 { 24 return lru->memcg_aware; 25 } 26 27 static void list_lru_register(struct list_lru *lru) 28 { 29 if (!list_lru_memcg_aware(lru)) 30 return; 31 32 mutex_lock(&list_lrus_mutex); 33 list_add(&lru->list, &memcg_list_lrus); 34 mutex_unlock(&list_lrus_mutex); 35 } 36 37 static void list_lru_unregister(struct list_lru *lru) 38 { 39 if (!list_lru_memcg_aware(lru)) 40 return; 41 42 mutex_lock(&list_lrus_mutex); 43 list_del(&lru->list); 44 mutex_unlock(&list_lrus_mutex); 45 } 46 47 static int lru_shrinker_id(struct list_lru *lru) 48 { 49 return lru->shrinker_id; 50 } 51 52 static inline struct list_lru_one * 53 list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx) 54 { 55 if (list_lru_memcg_aware(lru) && idx >= 0) { 56 struct list_lru_memcg *mlru = xa_load(&lru->xa, idx); 57 58 return mlru ? &mlru->node[nid] : NULL; 59 } 60 return &lru->node[nid].lru; 61 } 62 #else 63 static void list_lru_register(struct list_lru *lru) 64 { 65 } 66 67 static void list_lru_unregister(struct list_lru *lru) 68 { 69 } 70 71 static int lru_shrinker_id(struct list_lru *lru) 72 { 73 return -1; 74 } 75 76 static inline bool list_lru_memcg_aware(struct list_lru *lru) 77 { 78 return false; 79 } 80 81 static inline struct list_lru_one * 82 list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx) 83 { 84 return &lru->node[nid].lru; 85 } 86 #endif /* CONFIG_MEMCG */ 87 88 bool list_lru_add(struct list_lru *lru, struct list_head *item, int nid, 89 struct mem_cgroup *memcg) 90 { 91 struct list_lru_node *nlru = &lru->node[nid]; 92 struct list_lru_one *l; 93 94 spin_lock(&nlru->lock); 95 if (list_empty(item)) { 96 l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg)); 97 list_add_tail(item, &l->list); 98 /* Set shrinker bit if the first element was added */ 99 if (!l->nr_items++) 100 set_shrinker_bit(memcg, nid, lru_shrinker_id(lru)); 101 nlru->nr_items++; 102 spin_unlock(&nlru->lock); 103 return true; 104 } 105 spin_unlock(&nlru->lock); 106 return false; 107 } 108 EXPORT_SYMBOL_GPL(list_lru_add); 109 110 bool list_lru_add_obj(struct list_lru *lru, struct list_head *item) 111 { 112 int nid = page_to_nid(virt_to_page(item)); 113 struct mem_cgroup *memcg = list_lru_memcg_aware(lru) ? 114 mem_cgroup_from_slab_obj(item) : NULL; 115 116 return list_lru_add(lru, item, nid, memcg); 117 } 118 EXPORT_SYMBOL_GPL(list_lru_add_obj); 119 120 bool list_lru_del(struct list_lru *lru, struct list_head *item, int nid, 121 struct mem_cgroup *memcg) 122 { 123 struct list_lru_node *nlru = &lru->node[nid]; 124 struct list_lru_one *l; 125 126 spin_lock(&nlru->lock); 127 if (!list_empty(item)) { 128 l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg)); 129 list_del_init(item); 130 l->nr_items--; 131 nlru->nr_items--; 132 spin_unlock(&nlru->lock); 133 return true; 134 } 135 spin_unlock(&nlru->lock); 136 return false; 137 } 138 EXPORT_SYMBOL_GPL(list_lru_del); 139 140 bool list_lru_del_obj(struct list_lru *lru, struct list_head *item) 141 { 142 int nid = page_to_nid(virt_to_page(item)); 143 struct mem_cgroup *memcg = list_lru_memcg_aware(lru) ? 144 mem_cgroup_from_slab_obj(item) : NULL; 145 146 return list_lru_del(lru, item, nid, memcg); 147 } 148 EXPORT_SYMBOL_GPL(list_lru_del_obj); 149 150 void list_lru_isolate(struct list_lru_one *list, struct list_head *item) 151 { 152 list_del_init(item); 153 list->nr_items--; 154 } 155 EXPORT_SYMBOL_GPL(list_lru_isolate); 156 157 void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item, 158 struct list_head *head) 159 { 160 list_move(item, head); 161 list->nr_items--; 162 } 163 EXPORT_SYMBOL_GPL(list_lru_isolate_move); 164 165 unsigned long list_lru_count_one(struct list_lru *lru, 166 int nid, struct mem_cgroup *memcg) 167 { 168 struct list_lru_one *l; 169 long count; 170 171 rcu_read_lock(); 172 l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg)); 173 count = l ? READ_ONCE(l->nr_items) : 0; 174 rcu_read_unlock(); 175 176 if (unlikely(count < 0)) 177 count = 0; 178 179 return count; 180 } 181 EXPORT_SYMBOL_GPL(list_lru_count_one); 182 183 unsigned long list_lru_count_node(struct list_lru *lru, int nid) 184 { 185 struct list_lru_node *nlru; 186 187 nlru = &lru->node[nid]; 188 return nlru->nr_items; 189 } 190 EXPORT_SYMBOL_GPL(list_lru_count_node); 191 192 static unsigned long 193 __list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx, 194 list_lru_walk_cb isolate, void *cb_arg, 195 unsigned long *nr_to_walk) 196 { 197 struct list_lru_node *nlru = &lru->node[nid]; 198 struct list_lru_one *l; 199 struct list_head *item, *n; 200 unsigned long isolated = 0; 201 202 restart: 203 l = list_lru_from_memcg_idx(lru, nid, memcg_idx); 204 if (!l) 205 goto out; 206 207 list_for_each_safe(item, n, &l->list) { 208 enum lru_status ret; 209 210 /* 211 * decrement nr_to_walk first so that we don't livelock if we 212 * get stuck on large numbers of LRU_RETRY items 213 */ 214 if (!*nr_to_walk) 215 break; 216 --*nr_to_walk; 217 218 ret = isolate(item, l, &nlru->lock, cb_arg); 219 switch (ret) { 220 case LRU_REMOVED_RETRY: 221 assert_spin_locked(&nlru->lock); 222 fallthrough; 223 case LRU_REMOVED: 224 isolated++; 225 nlru->nr_items--; 226 /* 227 * If the lru lock has been dropped, our list 228 * traversal is now invalid and so we have to 229 * restart from scratch. 230 */ 231 if (ret == LRU_REMOVED_RETRY) 232 goto restart; 233 break; 234 case LRU_ROTATE: 235 list_move_tail(item, &l->list); 236 break; 237 case LRU_SKIP: 238 break; 239 case LRU_RETRY: 240 /* 241 * The lru lock has been dropped, our list traversal is 242 * now invalid and so we have to restart from scratch. 243 */ 244 assert_spin_locked(&nlru->lock); 245 goto restart; 246 case LRU_STOP: 247 assert_spin_locked(&nlru->lock); 248 goto out; 249 default: 250 BUG(); 251 } 252 } 253 out: 254 return isolated; 255 } 256 257 unsigned long 258 list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg, 259 list_lru_walk_cb isolate, void *cb_arg, 260 unsigned long *nr_to_walk) 261 { 262 struct list_lru_node *nlru = &lru->node[nid]; 263 unsigned long ret; 264 265 spin_lock(&nlru->lock); 266 ret = __list_lru_walk_one(lru, nid, memcg_kmem_id(memcg), isolate, 267 cb_arg, nr_to_walk); 268 spin_unlock(&nlru->lock); 269 return ret; 270 } 271 EXPORT_SYMBOL_GPL(list_lru_walk_one); 272 273 unsigned long 274 list_lru_walk_one_irq(struct list_lru *lru, int nid, struct mem_cgroup *memcg, 275 list_lru_walk_cb isolate, void *cb_arg, 276 unsigned long *nr_to_walk) 277 { 278 struct list_lru_node *nlru = &lru->node[nid]; 279 unsigned long ret; 280 281 spin_lock_irq(&nlru->lock); 282 ret = __list_lru_walk_one(lru, nid, memcg_kmem_id(memcg), isolate, 283 cb_arg, nr_to_walk); 284 spin_unlock_irq(&nlru->lock); 285 return ret; 286 } 287 288 unsigned long list_lru_walk_node(struct list_lru *lru, int nid, 289 list_lru_walk_cb isolate, void *cb_arg, 290 unsigned long *nr_to_walk) 291 { 292 long isolated = 0; 293 294 isolated += list_lru_walk_one(lru, nid, NULL, isolate, cb_arg, 295 nr_to_walk); 296 297 #ifdef CONFIG_MEMCG 298 if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) { 299 struct list_lru_memcg *mlru; 300 unsigned long index; 301 302 xa_for_each(&lru->xa, index, mlru) { 303 struct list_lru_node *nlru = &lru->node[nid]; 304 305 spin_lock(&nlru->lock); 306 isolated += __list_lru_walk_one(lru, nid, index, 307 isolate, cb_arg, 308 nr_to_walk); 309 spin_unlock(&nlru->lock); 310 311 if (*nr_to_walk <= 0) 312 break; 313 } 314 } 315 #endif 316 317 return isolated; 318 } 319 EXPORT_SYMBOL_GPL(list_lru_walk_node); 320 321 static void init_one_lru(struct list_lru_one *l) 322 { 323 INIT_LIST_HEAD(&l->list); 324 l->nr_items = 0; 325 } 326 327 #ifdef CONFIG_MEMCG 328 static struct list_lru_memcg *memcg_init_list_lru_one(gfp_t gfp) 329 { 330 int nid; 331 struct list_lru_memcg *mlru; 332 333 mlru = kmalloc(struct_size(mlru, node, nr_node_ids), gfp); 334 if (!mlru) 335 return NULL; 336 337 for_each_node(nid) 338 init_one_lru(&mlru->node[nid]); 339 340 return mlru; 341 } 342 343 static void memcg_list_lru_free(struct list_lru *lru, int src_idx) 344 { 345 struct list_lru_memcg *mlru = xa_erase_irq(&lru->xa, src_idx); 346 347 /* 348 * The __list_lru_walk_one() can walk the list of this node. 349 * We need kvfree_rcu() here. And the walking of the list 350 * is under lru->node[nid]->lock, which can serve as a RCU 351 * read-side critical section. 352 */ 353 if (mlru) 354 kvfree_rcu(mlru, rcu); 355 } 356 357 static inline void memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) 358 { 359 if (memcg_aware) 360 xa_init_flags(&lru->xa, XA_FLAGS_LOCK_IRQ); 361 lru->memcg_aware = memcg_aware; 362 } 363 364 static void memcg_destroy_list_lru(struct list_lru *lru) 365 { 366 XA_STATE(xas, &lru->xa, 0); 367 struct list_lru_memcg *mlru; 368 369 if (!list_lru_memcg_aware(lru)) 370 return; 371 372 xas_lock_irq(&xas); 373 xas_for_each(&xas, mlru, ULONG_MAX) { 374 kfree(mlru); 375 xas_store(&xas, NULL); 376 } 377 xas_unlock_irq(&xas); 378 } 379 380 static void memcg_reparent_list_lru_node(struct list_lru *lru, int nid, 381 int src_idx, struct mem_cgroup *dst_memcg) 382 { 383 struct list_lru_node *nlru = &lru->node[nid]; 384 int dst_idx = dst_memcg->kmemcg_id; 385 struct list_lru_one *src, *dst; 386 387 /* 388 * Since list_lru_{add,del} may be called under an IRQ-safe lock, 389 * we have to use IRQ-safe primitives here to avoid deadlock. 390 */ 391 spin_lock_irq(&nlru->lock); 392 393 src = list_lru_from_memcg_idx(lru, nid, src_idx); 394 if (!src) 395 goto out; 396 dst = list_lru_from_memcg_idx(lru, nid, dst_idx); 397 398 list_splice_init(&src->list, &dst->list); 399 400 if (src->nr_items) { 401 dst->nr_items += src->nr_items; 402 set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru)); 403 src->nr_items = 0; 404 } 405 out: 406 spin_unlock_irq(&nlru->lock); 407 } 408 409 static void memcg_reparent_list_lru(struct list_lru *lru, 410 int src_idx, struct mem_cgroup *dst_memcg) 411 { 412 int i; 413 414 for_each_node(i) 415 memcg_reparent_list_lru_node(lru, i, src_idx, dst_memcg); 416 417 memcg_list_lru_free(lru, src_idx); 418 } 419 420 void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *parent) 421 { 422 struct cgroup_subsys_state *css; 423 struct list_lru *lru; 424 int src_idx = memcg->kmemcg_id; 425 426 /* 427 * Change kmemcg_id of this cgroup and all its descendants to the 428 * parent's id, and then move all entries from this cgroup's list_lrus 429 * to ones of the parent. 430 * 431 * After we have finished, all list_lrus corresponding to this cgroup 432 * are guaranteed to remain empty. So we can safely free this cgroup's 433 * list lrus in memcg_list_lru_free(). 434 * 435 * Changing ->kmemcg_id to the parent can prevent memcg_list_lru_alloc() 436 * from allocating list lrus for this cgroup after memcg_list_lru_free() 437 * call. 438 */ 439 rcu_read_lock(); 440 css_for_each_descendant_pre(css, &memcg->css) { 441 struct mem_cgroup *child; 442 443 child = mem_cgroup_from_css(css); 444 WRITE_ONCE(child->kmemcg_id, parent->kmemcg_id); 445 } 446 rcu_read_unlock(); 447 448 mutex_lock(&list_lrus_mutex); 449 list_for_each_entry(lru, &memcg_list_lrus, list) 450 memcg_reparent_list_lru(lru, src_idx, parent); 451 mutex_unlock(&list_lrus_mutex); 452 } 453 454 static inline bool memcg_list_lru_allocated(struct mem_cgroup *memcg, 455 struct list_lru *lru) 456 { 457 int idx = memcg->kmemcg_id; 458 459 return idx < 0 || xa_load(&lru->xa, idx); 460 } 461 462 int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru, 463 gfp_t gfp) 464 { 465 int i; 466 unsigned long flags; 467 struct list_lru_memcg_table { 468 struct list_lru_memcg *mlru; 469 struct mem_cgroup *memcg; 470 } *table; 471 XA_STATE(xas, &lru->xa, 0); 472 473 if (!list_lru_memcg_aware(lru) || memcg_list_lru_allocated(memcg, lru)) 474 return 0; 475 476 gfp &= GFP_RECLAIM_MASK; 477 table = kmalloc_array(memcg->css.cgroup->level, sizeof(*table), gfp); 478 if (!table) 479 return -ENOMEM; 480 481 /* 482 * Because the list_lru can be reparented to the parent cgroup's 483 * list_lru, we should make sure that this cgroup and all its 484 * ancestors have allocated list_lru_memcg. 485 */ 486 for (i = 0; memcg; memcg = parent_mem_cgroup(memcg), i++) { 487 if (memcg_list_lru_allocated(memcg, lru)) 488 break; 489 490 table[i].memcg = memcg; 491 table[i].mlru = memcg_init_list_lru_one(gfp); 492 if (!table[i].mlru) { 493 while (i--) 494 kfree(table[i].mlru); 495 kfree(table); 496 return -ENOMEM; 497 } 498 } 499 500 xas_lock_irqsave(&xas, flags); 501 while (i--) { 502 int index = READ_ONCE(table[i].memcg->kmemcg_id); 503 struct list_lru_memcg *mlru = table[i].mlru; 504 505 xas_set(&xas, index); 506 retry: 507 if (unlikely(index < 0 || xas_error(&xas) || xas_load(&xas))) { 508 kfree(mlru); 509 } else { 510 xas_store(&xas, mlru); 511 if (xas_error(&xas) == -ENOMEM) { 512 xas_unlock_irqrestore(&xas, flags); 513 if (xas_nomem(&xas, gfp)) 514 xas_set_err(&xas, 0); 515 xas_lock_irqsave(&xas, flags); 516 /* 517 * The xas lock has been released, this memcg 518 * can be reparented before us. So reload 519 * memcg id. More details see the comments 520 * in memcg_reparent_list_lrus(). 521 */ 522 index = READ_ONCE(table[i].memcg->kmemcg_id); 523 if (index < 0) 524 xas_set_err(&xas, 0); 525 else if (!xas_error(&xas) && index != xas.xa_index) 526 xas_set(&xas, index); 527 goto retry; 528 } 529 } 530 } 531 /* xas_nomem() is used to free memory instead of memory allocation. */ 532 if (xas.xa_alloc) 533 xas_nomem(&xas, gfp); 534 xas_unlock_irqrestore(&xas, flags); 535 kfree(table); 536 537 return xas_error(&xas); 538 } 539 #else 540 static inline void memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) 541 { 542 } 543 544 static void memcg_destroy_list_lru(struct list_lru *lru) 545 { 546 } 547 #endif /* CONFIG_MEMCG */ 548 549 int __list_lru_init(struct list_lru *lru, bool memcg_aware, 550 struct lock_class_key *key, struct shrinker *shrinker) 551 { 552 int i; 553 554 #ifdef CONFIG_MEMCG 555 if (shrinker) 556 lru->shrinker_id = shrinker->id; 557 else 558 lru->shrinker_id = -1; 559 560 if (mem_cgroup_kmem_disabled()) 561 memcg_aware = false; 562 #endif 563 564 lru->node = kcalloc(nr_node_ids, sizeof(*lru->node), GFP_KERNEL); 565 if (!lru->node) 566 return -ENOMEM; 567 568 for_each_node(i) { 569 spin_lock_init(&lru->node[i].lock); 570 if (key) 571 lockdep_set_class(&lru->node[i].lock, key); 572 init_one_lru(&lru->node[i].lru); 573 } 574 575 memcg_init_list_lru(lru, memcg_aware); 576 list_lru_register(lru); 577 578 return 0; 579 } 580 EXPORT_SYMBOL_GPL(__list_lru_init); 581 582 void list_lru_destroy(struct list_lru *lru) 583 { 584 /* Already destroyed or not yet initialized? */ 585 if (!lru->node) 586 return; 587 588 list_lru_unregister(lru); 589 590 memcg_destroy_list_lru(lru); 591 kfree(lru->node); 592 lru->node = NULL; 593 594 #ifdef CONFIG_MEMCG 595 lru->shrinker_id = -1; 596 #endif 597 } 598 EXPORT_SYMBOL_GPL(list_lru_destroy); 599