1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved. 4 * Authors: David Chinner and Glauber Costa 5 * 6 * Generic LRU infrastructure 7 */ 8 #include <linux/kernel.h> 9 #include <linux/module.h> 10 #include <linux/mm.h> 11 #include <linux/list_lru.h> 12 #include <linux/slab.h> 13 #include <linux/mutex.h> 14 #include <linux/memcontrol.h> 15 #include "slab.h" 16 #include "internal.h" 17 18 #ifdef CONFIG_MEMCG_KMEM 19 static LIST_HEAD(memcg_list_lrus); 20 static DEFINE_MUTEX(list_lrus_mutex); 21 22 static inline bool list_lru_memcg_aware(struct list_lru *lru) 23 { 24 return lru->memcg_aware; 25 } 26 27 static void list_lru_register(struct list_lru *lru) 28 { 29 if (!list_lru_memcg_aware(lru)) 30 return; 31 32 mutex_lock(&list_lrus_mutex); 33 list_add(&lru->list, &memcg_list_lrus); 34 mutex_unlock(&list_lrus_mutex); 35 } 36 37 static void list_lru_unregister(struct list_lru *lru) 38 { 39 if (!list_lru_memcg_aware(lru)) 40 return; 41 42 mutex_lock(&list_lrus_mutex); 43 list_del(&lru->list); 44 mutex_unlock(&list_lrus_mutex); 45 } 46 47 static int lru_shrinker_id(struct list_lru *lru) 48 { 49 return lru->shrinker_id; 50 } 51 52 static inline struct list_lru_one * 53 list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx) 54 { 55 if (list_lru_memcg_aware(lru) && idx >= 0) { 56 struct list_lru_memcg *mlru = xa_load(&lru->xa, idx); 57 58 return mlru ? &mlru->node[nid] : NULL; 59 } 60 return &lru->node[nid].lru; 61 } 62 #else 63 static void list_lru_register(struct list_lru *lru) 64 { 65 } 66 67 static void list_lru_unregister(struct list_lru *lru) 68 { 69 } 70 71 static int lru_shrinker_id(struct list_lru *lru) 72 { 73 return -1; 74 } 75 76 static inline bool list_lru_memcg_aware(struct list_lru *lru) 77 { 78 return false; 79 } 80 81 static inline struct list_lru_one * 82 list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx) 83 { 84 return &lru->node[nid].lru; 85 } 86 #endif /* CONFIG_MEMCG_KMEM */ 87 88 bool list_lru_add(struct list_lru *lru, struct list_head *item, int nid, 89 struct mem_cgroup *memcg) 90 { 91 struct list_lru_node *nlru = &lru->node[nid]; 92 struct list_lru_one *l; 93 94 spin_lock(&nlru->lock); 95 if (list_empty(item)) { 96 l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg)); 97 list_add_tail(item, &l->list); 98 /* Set shrinker bit if the first element was added */ 99 if (!l->nr_items++) 100 set_shrinker_bit(memcg, nid, lru_shrinker_id(lru)); 101 nlru->nr_items++; 102 spin_unlock(&nlru->lock); 103 return true; 104 } 105 spin_unlock(&nlru->lock); 106 return false; 107 } 108 EXPORT_SYMBOL_GPL(list_lru_add); 109 110 bool list_lru_add_obj(struct list_lru *lru, struct list_head *item) 111 { 112 int nid = page_to_nid(virt_to_page(item)); 113 struct mem_cgroup *memcg = list_lru_memcg_aware(lru) ? 114 mem_cgroup_from_slab_obj(item) : NULL; 115 116 return list_lru_add(lru, item, nid, memcg); 117 } 118 EXPORT_SYMBOL_GPL(list_lru_add_obj); 119 120 bool list_lru_del(struct list_lru *lru, struct list_head *item, int nid, 121 struct mem_cgroup *memcg) 122 { 123 struct list_lru_node *nlru = &lru->node[nid]; 124 struct list_lru_one *l; 125 126 spin_lock(&nlru->lock); 127 if (!list_empty(item)) { 128 l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg)); 129 list_del_init(item); 130 l->nr_items--; 131 nlru->nr_items--; 132 spin_unlock(&nlru->lock); 133 return true; 134 } 135 spin_unlock(&nlru->lock); 136 return false; 137 } 138 EXPORT_SYMBOL_GPL(list_lru_del); 139 140 bool list_lru_del_obj(struct list_lru *lru, struct list_head *item) 141 { 142 int nid = page_to_nid(virt_to_page(item)); 143 struct mem_cgroup *memcg = list_lru_memcg_aware(lru) ? 144 mem_cgroup_from_slab_obj(item) : NULL; 145 146 return list_lru_del(lru, item, nid, memcg); 147 } 148 EXPORT_SYMBOL_GPL(list_lru_del_obj); 149 150 void list_lru_isolate(struct list_lru_one *list, struct list_head *item) 151 { 152 list_del_init(item); 153 list->nr_items--; 154 } 155 EXPORT_SYMBOL_GPL(list_lru_isolate); 156 157 void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item, 158 struct list_head *head) 159 { 160 list_move(item, head); 161 list->nr_items--; 162 } 163 EXPORT_SYMBOL_GPL(list_lru_isolate_move); 164 165 void list_lru_putback(struct list_lru *lru, struct list_head *item, int nid, 166 struct mem_cgroup *memcg) 167 { 168 struct list_lru_one *list = 169 list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg)); 170 171 if (list_empty(item)) { 172 list_add_tail(item, &list->list); 173 if (!list->nr_items++) 174 set_shrinker_bit(memcg, nid, lru_shrinker_id(lru)); 175 } 176 } 177 EXPORT_SYMBOL_GPL(list_lru_putback); 178 179 unsigned long list_lru_count_one(struct list_lru *lru, 180 int nid, struct mem_cgroup *memcg) 181 { 182 struct list_lru_one *l; 183 long count; 184 185 rcu_read_lock(); 186 l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg)); 187 count = l ? READ_ONCE(l->nr_items) : 0; 188 rcu_read_unlock(); 189 190 if (unlikely(count < 0)) 191 count = 0; 192 193 return count; 194 } 195 EXPORT_SYMBOL_GPL(list_lru_count_one); 196 197 unsigned long list_lru_count_node(struct list_lru *lru, int nid) 198 { 199 struct list_lru_node *nlru; 200 201 nlru = &lru->node[nid]; 202 return nlru->nr_items; 203 } 204 EXPORT_SYMBOL_GPL(list_lru_count_node); 205 206 static unsigned long 207 __list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx, 208 list_lru_walk_cb isolate, void *cb_arg, 209 unsigned long *nr_to_walk) 210 { 211 struct list_lru_node *nlru = &lru->node[nid]; 212 struct list_lru_one *l; 213 struct list_head *item, *n; 214 unsigned long isolated = 0; 215 216 restart: 217 l = list_lru_from_memcg_idx(lru, nid, memcg_idx); 218 if (!l) 219 goto out; 220 221 list_for_each_safe(item, n, &l->list) { 222 enum lru_status ret; 223 224 /* 225 * decrement nr_to_walk first so that we don't livelock if we 226 * get stuck on large numbers of LRU_RETRY items 227 */ 228 if (!*nr_to_walk) 229 break; 230 --*nr_to_walk; 231 232 ret = isolate(item, l, &nlru->lock, cb_arg); 233 switch (ret) { 234 case LRU_REMOVED_RETRY: 235 assert_spin_locked(&nlru->lock); 236 fallthrough; 237 case LRU_REMOVED: 238 isolated++; 239 nlru->nr_items--; 240 /* 241 * If the lru lock has been dropped, our list 242 * traversal is now invalid and so we have to 243 * restart from scratch. 244 */ 245 if (ret == LRU_REMOVED_RETRY) 246 goto restart; 247 break; 248 case LRU_ROTATE: 249 list_move_tail(item, &l->list); 250 break; 251 case LRU_SKIP: 252 break; 253 case LRU_RETRY: 254 /* 255 * The lru lock has been dropped, our list traversal is 256 * now invalid and so we have to restart from scratch. 257 */ 258 assert_spin_locked(&nlru->lock); 259 goto restart; 260 default: 261 BUG(); 262 } 263 } 264 out: 265 return isolated; 266 } 267 268 unsigned long 269 list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg, 270 list_lru_walk_cb isolate, void *cb_arg, 271 unsigned long *nr_to_walk) 272 { 273 struct list_lru_node *nlru = &lru->node[nid]; 274 unsigned long ret; 275 276 spin_lock(&nlru->lock); 277 ret = __list_lru_walk_one(lru, nid, memcg_kmem_id(memcg), isolate, 278 cb_arg, nr_to_walk); 279 spin_unlock(&nlru->lock); 280 return ret; 281 } 282 EXPORT_SYMBOL_GPL(list_lru_walk_one); 283 284 unsigned long 285 list_lru_walk_one_irq(struct list_lru *lru, int nid, struct mem_cgroup *memcg, 286 list_lru_walk_cb isolate, void *cb_arg, 287 unsigned long *nr_to_walk) 288 { 289 struct list_lru_node *nlru = &lru->node[nid]; 290 unsigned long ret; 291 292 spin_lock_irq(&nlru->lock); 293 ret = __list_lru_walk_one(lru, nid, memcg_kmem_id(memcg), isolate, 294 cb_arg, nr_to_walk); 295 spin_unlock_irq(&nlru->lock); 296 return ret; 297 } 298 299 unsigned long list_lru_walk_node(struct list_lru *lru, int nid, 300 list_lru_walk_cb isolate, void *cb_arg, 301 unsigned long *nr_to_walk) 302 { 303 long isolated = 0; 304 305 isolated += list_lru_walk_one(lru, nid, NULL, isolate, cb_arg, 306 nr_to_walk); 307 308 #ifdef CONFIG_MEMCG_KMEM 309 if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) { 310 struct list_lru_memcg *mlru; 311 unsigned long index; 312 313 xa_for_each(&lru->xa, index, mlru) { 314 struct list_lru_node *nlru = &lru->node[nid]; 315 316 spin_lock(&nlru->lock); 317 isolated += __list_lru_walk_one(lru, nid, index, 318 isolate, cb_arg, 319 nr_to_walk); 320 spin_unlock(&nlru->lock); 321 322 if (*nr_to_walk <= 0) 323 break; 324 } 325 } 326 #endif 327 328 return isolated; 329 } 330 EXPORT_SYMBOL_GPL(list_lru_walk_node); 331 332 static void init_one_lru(struct list_lru_one *l) 333 { 334 INIT_LIST_HEAD(&l->list); 335 l->nr_items = 0; 336 } 337 338 #ifdef CONFIG_MEMCG_KMEM 339 static struct list_lru_memcg *memcg_init_list_lru_one(gfp_t gfp) 340 { 341 int nid; 342 struct list_lru_memcg *mlru; 343 344 mlru = kmalloc(struct_size(mlru, node, nr_node_ids), gfp); 345 if (!mlru) 346 return NULL; 347 348 for_each_node(nid) 349 init_one_lru(&mlru->node[nid]); 350 351 return mlru; 352 } 353 354 static void memcg_list_lru_free(struct list_lru *lru, int src_idx) 355 { 356 struct list_lru_memcg *mlru = xa_erase_irq(&lru->xa, src_idx); 357 358 /* 359 * The __list_lru_walk_one() can walk the list of this node. 360 * We need kvfree_rcu() here. And the walking of the list 361 * is under lru->node[nid]->lock, which can serve as a RCU 362 * read-side critical section. 363 */ 364 if (mlru) 365 kvfree_rcu(mlru, rcu); 366 } 367 368 static inline void memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) 369 { 370 if (memcg_aware) 371 xa_init_flags(&lru->xa, XA_FLAGS_LOCK_IRQ); 372 lru->memcg_aware = memcg_aware; 373 } 374 375 static void memcg_destroy_list_lru(struct list_lru *lru) 376 { 377 XA_STATE(xas, &lru->xa, 0); 378 struct list_lru_memcg *mlru; 379 380 if (!list_lru_memcg_aware(lru)) 381 return; 382 383 xas_lock_irq(&xas); 384 xas_for_each(&xas, mlru, ULONG_MAX) { 385 kfree(mlru); 386 xas_store(&xas, NULL); 387 } 388 xas_unlock_irq(&xas); 389 } 390 391 static void memcg_reparent_list_lru_node(struct list_lru *lru, int nid, 392 int src_idx, struct mem_cgroup *dst_memcg) 393 { 394 struct list_lru_node *nlru = &lru->node[nid]; 395 int dst_idx = dst_memcg->kmemcg_id; 396 struct list_lru_one *src, *dst; 397 398 /* 399 * Since list_lru_{add,del} may be called under an IRQ-safe lock, 400 * we have to use IRQ-safe primitives here to avoid deadlock. 401 */ 402 spin_lock_irq(&nlru->lock); 403 404 src = list_lru_from_memcg_idx(lru, nid, src_idx); 405 if (!src) 406 goto out; 407 dst = list_lru_from_memcg_idx(lru, nid, dst_idx); 408 409 list_splice_init(&src->list, &dst->list); 410 411 if (src->nr_items) { 412 dst->nr_items += src->nr_items; 413 set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru)); 414 src->nr_items = 0; 415 } 416 out: 417 spin_unlock_irq(&nlru->lock); 418 } 419 420 static void memcg_reparent_list_lru(struct list_lru *lru, 421 int src_idx, struct mem_cgroup *dst_memcg) 422 { 423 int i; 424 425 for_each_node(i) 426 memcg_reparent_list_lru_node(lru, i, src_idx, dst_memcg); 427 428 memcg_list_lru_free(lru, src_idx); 429 } 430 431 void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *parent) 432 { 433 struct cgroup_subsys_state *css; 434 struct list_lru *lru; 435 int src_idx = memcg->kmemcg_id; 436 437 /* 438 * Change kmemcg_id of this cgroup and all its descendants to the 439 * parent's id, and then move all entries from this cgroup's list_lrus 440 * to ones of the parent. 441 * 442 * After we have finished, all list_lrus corresponding to this cgroup 443 * are guaranteed to remain empty. So we can safely free this cgroup's 444 * list lrus in memcg_list_lru_free(). 445 * 446 * Changing ->kmemcg_id to the parent can prevent memcg_list_lru_alloc() 447 * from allocating list lrus for this cgroup after memcg_list_lru_free() 448 * call. 449 */ 450 rcu_read_lock(); 451 css_for_each_descendant_pre(css, &memcg->css) { 452 struct mem_cgroup *child; 453 454 child = mem_cgroup_from_css(css); 455 WRITE_ONCE(child->kmemcg_id, parent->kmemcg_id); 456 } 457 rcu_read_unlock(); 458 459 mutex_lock(&list_lrus_mutex); 460 list_for_each_entry(lru, &memcg_list_lrus, list) 461 memcg_reparent_list_lru(lru, src_idx, parent); 462 mutex_unlock(&list_lrus_mutex); 463 } 464 465 static inline bool memcg_list_lru_allocated(struct mem_cgroup *memcg, 466 struct list_lru *lru) 467 { 468 int idx = memcg->kmemcg_id; 469 470 return idx < 0 || xa_load(&lru->xa, idx); 471 } 472 473 int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru, 474 gfp_t gfp) 475 { 476 int i; 477 unsigned long flags; 478 struct list_lru_memcg_table { 479 struct list_lru_memcg *mlru; 480 struct mem_cgroup *memcg; 481 } *table; 482 XA_STATE(xas, &lru->xa, 0); 483 484 if (!list_lru_memcg_aware(lru) || memcg_list_lru_allocated(memcg, lru)) 485 return 0; 486 487 gfp &= GFP_RECLAIM_MASK; 488 table = kmalloc_array(memcg->css.cgroup->level, sizeof(*table), gfp); 489 if (!table) 490 return -ENOMEM; 491 492 /* 493 * Because the list_lru can be reparented to the parent cgroup's 494 * list_lru, we should make sure that this cgroup and all its 495 * ancestors have allocated list_lru_memcg. 496 */ 497 for (i = 0; memcg; memcg = parent_mem_cgroup(memcg), i++) { 498 if (memcg_list_lru_allocated(memcg, lru)) 499 break; 500 501 table[i].memcg = memcg; 502 table[i].mlru = memcg_init_list_lru_one(gfp); 503 if (!table[i].mlru) { 504 while (i--) 505 kfree(table[i].mlru); 506 kfree(table); 507 return -ENOMEM; 508 } 509 } 510 511 xas_lock_irqsave(&xas, flags); 512 while (i--) { 513 int index = READ_ONCE(table[i].memcg->kmemcg_id); 514 struct list_lru_memcg *mlru = table[i].mlru; 515 516 xas_set(&xas, index); 517 retry: 518 if (unlikely(index < 0 || xas_error(&xas) || xas_load(&xas))) { 519 kfree(mlru); 520 } else { 521 xas_store(&xas, mlru); 522 if (xas_error(&xas) == -ENOMEM) { 523 xas_unlock_irqrestore(&xas, flags); 524 if (xas_nomem(&xas, gfp)) 525 xas_set_err(&xas, 0); 526 xas_lock_irqsave(&xas, flags); 527 /* 528 * The xas lock has been released, this memcg 529 * can be reparented before us. So reload 530 * memcg id. More details see the comments 531 * in memcg_reparent_list_lrus(). 532 */ 533 index = READ_ONCE(table[i].memcg->kmemcg_id); 534 if (index < 0) 535 xas_set_err(&xas, 0); 536 else if (!xas_error(&xas) && index != xas.xa_index) 537 xas_set(&xas, index); 538 goto retry; 539 } 540 } 541 } 542 /* xas_nomem() is used to free memory instead of memory allocation. */ 543 if (xas.xa_alloc) 544 xas_nomem(&xas, gfp); 545 xas_unlock_irqrestore(&xas, flags); 546 kfree(table); 547 548 return xas_error(&xas); 549 } 550 #else 551 static inline void memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) 552 { 553 } 554 555 static void memcg_destroy_list_lru(struct list_lru *lru) 556 { 557 } 558 #endif /* CONFIG_MEMCG_KMEM */ 559 560 int __list_lru_init(struct list_lru *lru, bool memcg_aware, 561 struct lock_class_key *key, struct shrinker *shrinker) 562 { 563 int i; 564 565 #ifdef CONFIG_MEMCG_KMEM 566 if (shrinker) 567 lru->shrinker_id = shrinker->id; 568 else 569 lru->shrinker_id = -1; 570 #endif 571 572 lru->node = kcalloc(nr_node_ids, sizeof(*lru->node), GFP_KERNEL); 573 if (!lru->node) 574 return -ENOMEM; 575 576 for_each_node(i) { 577 spin_lock_init(&lru->node[i].lock); 578 if (key) 579 lockdep_set_class(&lru->node[i].lock, key); 580 init_one_lru(&lru->node[i].lru); 581 } 582 583 memcg_init_list_lru(lru, memcg_aware); 584 list_lru_register(lru); 585 586 return 0; 587 } 588 EXPORT_SYMBOL_GPL(__list_lru_init); 589 590 void list_lru_destroy(struct list_lru *lru) 591 { 592 /* Already destroyed or not yet initialized? */ 593 if (!lru->node) 594 return; 595 596 list_lru_unregister(lru); 597 598 memcg_destroy_list_lru(lru); 599 kfree(lru->node); 600 lru->node = NULL; 601 602 #ifdef CONFIG_MEMCG_KMEM 603 lru->shrinker_id = -1; 604 #endif 605 } 606 EXPORT_SYMBOL_GPL(list_lru_destroy); 607