1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved. 4 * Authors: David Chinner and Glauber Costa 5 * 6 * Generic LRU infrastructure 7 */ 8 #include <linux/kernel.h> 9 #include <linux/module.h> 10 #include <linux/mm.h> 11 #include <linux/list_lru.h> 12 #include <linux/slab.h> 13 #include <linux/mutex.h> 14 #include <linux/memcontrol.h> 15 #include "slab.h" 16 #include "internal.h" 17 18 #ifdef CONFIG_MEMCG_KMEM 19 static LIST_HEAD(memcg_list_lrus); 20 static DEFINE_MUTEX(list_lrus_mutex); 21 22 static inline bool list_lru_memcg_aware(struct list_lru *lru) 23 { 24 return lru->memcg_aware; 25 } 26 27 static void list_lru_register(struct list_lru *lru) 28 { 29 if (!list_lru_memcg_aware(lru)) 30 return; 31 32 mutex_lock(&list_lrus_mutex); 33 list_add(&lru->list, &memcg_list_lrus); 34 mutex_unlock(&list_lrus_mutex); 35 } 36 37 static void list_lru_unregister(struct list_lru *lru) 38 { 39 if (!list_lru_memcg_aware(lru)) 40 return; 41 42 mutex_lock(&list_lrus_mutex); 43 list_del(&lru->list); 44 mutex_unlock(&list_lrus_mutex); 45 } 46 47 static int lru_shrinker_id(struct list_lru *lru) 48 { 49 return lru->shrinker_id; 50 } 51 52 static inline struct list_lru_one * 53 list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx) 54 { 55 if (list_lru_memcg_aware(lru) && idx >= 0) { 56 struct list_lru_memcg *mlru = xa_load(&lru->xa, idx); 57 58 return mlru ? &mlru->node[nid] : NULL; 59 } 60 return &lru->node[nid].lru; 61 } 62 #else 63 static void list_lru_register(struct list_lru *lru) 64 { 65 } 66 67 static void list_lru_unregister(struct list_lru *lru) 68 { 69 } 70 71 static int lru_shrinker_id(struct list_lru *lru) 72 { 73 return -1; 74 } 75 76 static inline bool list_lru_memcg_aware(struct list_lru *lru) 77 { 78 return false; 79 } 80 81 static inline struct list_lru_one * 82 list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx) 83 { 84 return &lru->node[nid].lru; 85 } 86 #endif /* CONFIG_MEMCG_KMEM */ 87 88 bool list_lru_add(struct list_lru *lru, struct list_head *item, int nid, 89 struct mem_cgroup *memcg) 90 { 91 struct list_lru_node *nlru = &lru->node[nid]; 92 struct list_lru_one *l; 93 94 spin_lock(&nlru->lock); 95 if (list_empty(item)) { 96 l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg)); 97 list_add_tail(item, &l->list); 98 /* Set shrinker bit if the first element was added */ 99 if (!l->nr_items++) 100 set_shrinker_bit(memcg, nid, lru_shrinker_id(lru)); 101 nlru->nr_items++; 102 spin_unlock(&nlru->lock); 103 return true; 104 } 105 spin_unlock(&nlru->lock); 106 return false; 107 } 108 EXPORT_SYMBOL_GPL(list_lru_add); 109 110 bool list_lru_add_obj(struct list_lru *lru, struct list_head *item) 111 { 112 int nid = page_to_nid(virt_to_page(item)); 113 struct mem_cgroup *memcg = list_lru_memcg_aware(lru) ? 114 mem_cgroup_from_slab_obj(item) : NULL; 115 116 return list_lru_add(lru, item, nid, memcg); 117 } 118 EXPORT_SYMBOL_GPL(list_lru_add_obj); 119 120 bool list_lru_del(struct list_lru *lru, struct list_head *item, int nid, 121 struct mem_cgroup *memcg) 122 { 123 struct list_lru_node *nlru = &lru->node[nid]; 124 struct list_lru_one *l; 125 126 spin_lock(&nlru->lock); 127 if (!list_empty(item)) { 128 l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg)); 129 list_del_init(item); 130 l->nr_items--; 131 nlru->nr_items--; 132 spin_unlock(&nlru->lock); 133 return true; 134 } 135 spin_unlock(&nlru->lock); 136 return false; 137 } 138 EXPORT_SYMBOL_GPL(list_lru_del); 139 140 bool list_lru_del_obj(struct list_lru *lru, struct list_head *item) 141 { 142 int nid = page_to_nid(virt_to_page(item)); 143 struct mem_cgroup *memcg = list_lru_memcg_aware(lru) ? 144 mem_cgroup_from_slab_obj(item) : NULL; 145 146 return list_lru_del(lru, item, nid, memcg); 147 } 148 EXPORT_SYMBOL_GPL(list_lru_del_obj); 149 150 void list_lru_isolate(struct list_lru_one *list, struct list_head *item) 151 { 152 list_del_init(item); 153 list->nr_items--; 154 } 155 EXPORT_SYMBOL_GPL(list_lru_isolate); 156 157 void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item, 158 struct list_head *head) 159 { 160 list_move(item, head); 161 list->nr_items--; 162 } 163 EXPORT_SYMBOL_GPL(list_lru_isolate_move); 164 165 unsigned long list_lru_count_one(struct list_lru *lru, 166 int nid, struct mem_cgroup *memcg) 167 { 168 struct list_lru_one *l; 169 long count; 170 171 rcu_read_lock(); 172 l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg)); 173 count = l ? READ_ONCE(l->nr_items) : 0; 174 rcu_read_unlock(); 175 176 if (unlikely(count < 0)) 177 count = 0; 178 179 return count; 180 } 181 EXPORT_SYMBOL_GPL(list_lru_count_one); 182 183 unsigned long list_lru_count_node(struct list_lru *lru, int nid) 184 { 185 struct list_lru_node *nlru; 186 187 nlru = &lru->node[nid]; 188 return nlru->nr_items; 189 } 190 EXPORT_SYMBOL_GPL(list_lru_count_node); 191 192 static unsigned long 193 __list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx, 194 list_lru_walk_cb isolate, void *cb_arg, 195 unsigned long *nr_to_walk) 196 { 197 struct list_lru_node *nlru = &lru->node[nid]; 198 struct list_lru_one *l; 199 struct list_head *item, *n; 200 unsigned long isolated = 0; 201 202 restart: 203 l = list_lru_from_memcg_idx(lru, nid, memcg_idx); 204 if (!l) 205 goto out; 206 207 list_for_each_safe(item, n, &l->list) { 208 enum lru_status ret; 209 210 /* 211 * decrement nr_to_walk first so that we don't livelock if we 212 * get stuck on large numbers of LRU_RETRY items 213 */ 214 if (!*nr_to_walk) 215 break; 216 --*nr_to_walk; 217 218 ret = isolate(item, l, &nlru->lock, cb_arg); 219 switch (ret) { 220 case LRU_REMOVED_RETRY: 221 assert_spin_locked(&nlru->lock); 222 fallthrough; 223 case LRU_REMOVED: 224 isolated++; 225 nlru->nr_items--; 226 /* 227 * If the lru lock has been dropped, our list 228 * traversal is now invalid and so we have to 229 * restart from scratch. 230 */ 231 if (ret == LRU_REMOVED_RETRY) 232 goto restart; 233 break; 234 case LRU_ROTATE: 235 list_move_tail(item, &l->list); 236 break; 237 case LRU_SKIP: 238 break; 239 case LRU_RETRY: 240 /* 241 * The lru lock has been dropped, our list traversal is 242 * now invalid and so we have to restart from scratch. 243 */ 244 assert_spin_locked(&nlru->lock); 245 goto restart; 246 default: 247 BUG(); 248 } 249 } 250 out: 251 return isolated; 252 } 253 254 unsigned long 255 list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg, 256 list_lru_walk_cb isolate, void *cb_arg, 257 unsigned long *nr_to_walk) 258 { 259 struct list_lru_node *nlru = &lru->node[nid]; 260 unsigned long ret; 261 262 spin_lock(&nlru->lock); 263 ret = __list_lru_walk_one(lru, nid, memcg_kmem_id(memcg), isolate, 264 cb_arg, nr_to_walk); 265 spin_unlock(&nlru->lock); 266 return ret; 267 } 268 EXPORT_SYMBOL_GPL(list_lru_walk_one); 269 270 unsigned long 271 list_lru_walk_one_irq(struct list_lru *lru, int nid, struct mem_cgroup *memcg, 272 list_lru_walk_cb isolate, void *cb_arg, 273 unsigned long *nr_to_walk) 274 { 275 struct list_lru_node *nlru = &lru->node[nid]; 276 unsigned long ret; 277 278 spin_lock_irq(&nlru->lock); 279 ret = __list_lru_walk_one(lru, nid, memcg_kmem_id(memcg), isolate, 280 cb_arg, nr_to_walk); 281 spin_unlock_irq(&nlru->lock); 282 return ret; 283 } 284 285 unsigned long list_lru_walk_node(struct list_lru *lru, int nid, 286 list_lru_walk_cb isolate, void *cb_arg, 287 unsigned long *nr_to_walk) 288 { 289 long isolated = 0; 290 291 isolated += list_lru_walk_one(lru, nid, NULL, isolate, cb_arg, 292 nr_to_walk); 293 294 #ifdef CONFIG_MEMCG_KMEM 295 if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) { 296 struct list_lru_memcg *mlru; 297 unsigned long index; 298 299 xa_for_each(&lru->xa, index, mlru) { 300 struct list_lru_node *nlru = &lru->node[nid]; 301 302 spin_lock(&nlru->lock); 303 isolated += __list_lru_walk_one(lru, nid, index, 304 isolate, cb_arg, 305 nr_to_walk); 306 spin_unlock(&nlru->lock); 307 308 if (*nr_to_walk <= 0) 309 break; 310 } 311 } 312 #endif 313 314 return isolated; 315 } 316 EXPORT_SYMBOL_GPL(list_lru_walk_node); 317 318 static void init_one_lru(struct list_lru_one *l) 319 { 320 INIT_LIST_HEAD(&l->list); 321 l->nr_items = 0; 322 } 323 324 #ifdef CONFIG_MEMCG_KMEM 325 static struct list_lru_memcg *memcg_init_list_lru_one(gfp_t gfp) 326 { 327 int nid; 328 struct list_lru_memcg *mlru; 329 330 mlru = kmalloc(struct_size(mlru, node, nr_node_ids), gfp); 331 if (!mlru) 332 return NULL; 333 334 for_each_node(nid) 335 init_one_lru(&mlru->node[nid]); 336 337 return mlru; 338 } 339 340 static void memcg_list_lru_free(struct list_lru *lru, int src_idx) 341 { 342 struct list_lru_memcg *mlru = xa_erase_irq(&lru->xa, src_idx); 343 344 /* 345 * The __list_lru_walk_one() can walk the list of this node. 346 * We need kvfree_rcu() here. And the walking of the list 347 * is under lru->node[nid]->lock, which can serve as a RCU 348 * read-side critical section. 349 */ 350 if (mlru) 351 kvfree_rcu(mlru, rcu); 352 } 353 354 static inline void memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) 355 { 356 if (memcg_aware) 357 xa_init_flags(&lru->xa, XA_FLAGS_LOCK_IRQ); 358 lru->memcg_aware = memcg_aware; 359 } 360 361 static void memcg_destroy_list_lru(struct list_lru *lru) 362 { 363 XA_STATE(xas, &lru->xa, 0); 364 struct list_lru_memcg *mlru; 365 366 if (!list_lru_memcg_aware(lru)) 367 return; 368 369 xas_lock_irq(&xas); 370 xas_for_each(&xas, mlru, ULONG_MAX) { 371 kfree(mlru); 372 xas_store(&xas, NULL); 373 } 374 xas_unlock_irq(&xas); 375 } 376 377 static void memcg_reparent_list_lru_node(struct list_lru *lru, int nid, 378 int src_idx, struct mem_cgroup *dst_memcg) 379 { 380 struct list_lru_node *nlru = &lru->node[nid]; 381 int dst_idx = dst_memcg->kmemcg_id; 382 struct list_lru_one *src, *dst; 383 384 /* 385 * Since list_lru_{add,del} may be called under an IRQ-safe lock, 386 * we have to use IRQ-safe primitives here to avoid deadlock. 387 */ 388 spin_lock_irq(&nlru->lock); 389 390 src = list_lru_from_memcg_idx(lru, nid, src_idx); 391 if (!src) 392 goto out; 393 dst = list_lru_from_memcg_idx(lru, nid, dst_idx); 394 395 list_splice_init(&src->list, &dst->list); 396 397 if (src->nr_items) { 398 dst->nr_items += src->nr_items; 399 set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru)); 400 src->nr_items = 0; 401 } 402 out: 403 spin_unlock_irq(&nlru->lock); 404 } 405 406 static void memcg_reparent_list_lru(struct list_lru *lru, 407 int src_idx, struct mem_cgroup *dst_memcg) 408 { 409 int i; 410 411 for_each_node(i) 412 memcg_reparent_list_lru_node(lru, i, src_idx, dst_memcg); 413 414 memcg_list_lru_free(lru, src_idx); 415 } 416 417 void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *parent) 418 { 419 struct cgroup_subsys_state *css; 420 struct list_lru *lru; 421 int src_idx = memcg->kmemcg_id; 422 423 /* 424 * Change kmemcg_id of this cgroup and all its descendants to the 425 * parent's id, and then move all entries from this cgroup's list_lrus 426 * to ones of the parent. 427 * 428 * After we have finished, all list_lrus corresponding to this cgroup 429 * are guaranteed to remain empty. So we can safely free this cgroup's 430 * list lrus in memcg_list_lru_free(). 431 * 432 * Changing ->kmemcg_id to the parent can prevent memcg_list_lru_alloc() 433 * from allocating list lrus for this cgroup after memcg_list_lru_free() 434 * call. 435 */ 436 rcu_read_lock(); 437 css_for_each_descendant_pre(css, &memcg->css) { 438 struct mem_cgroup *child; 439 440 child = mem_cgroup_from_css(css); 441 WRITE_ONCE(child->kmemcg_id, parent->kmemcg_id); 442 } 443 rcu_read_unlock(); 444 445 mutex_lock(&list_lrus_mutex); 446 list_for_each_entry(lru, &memcg_list_lrus, list) 447 memcg_reparent_list_lru(lru, src_idx, parent); 448 mutex_unlock(&list_lrus_mutex); 449 } 450 451 static inline bool memcg_list_lru_allocated(struct mem_cgroup *memcg, 452 struct list_lru *lru) 453 { 454 int idx = memcg->kmemcg_id; 455 456 return idx < 0 || xa_load(&lru->xa, idx); 457 } 458 459 int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru, 460 gfp_t gfp) 461 { 462 int i; 463 unsigned long flags; 464 struct list_lru_memcg_table { 465 struct list_lru_memcg *mlru; 466 struct mem_cgroup *memcg; 467 } *table; 468 XA_STATE(xas, &lru->xa, 0); 469 470 if (!list_lru_memcg_aware(lru) || memcg_list_lru_allocated(memcg, lru)) 471 return 0; 472 473 gfp &= GFP_RECLAIM_MASK; 474 table = kmalloc_array(memcg->css.cgroup->level, sizeof(*table), gfp); 475 if (!table) 476 return -ENOMEM; 477 478 /* 479 * Because the list_lru can be reparented to the parent cgroup's 480 * list_lru, we should make sure that this cgroup and all its 481 * ancestors have allocated list_lru_memcg. 482 */ 483 for (i = 0; memcg; memcg = parent_mem_cgroup(memcg), i++) { 484 if (memcg_list_lru_allocated(memcg, lru)) 485 break; 486 487 table[i].memcg = memcg; 488 table[i].mlru = memcg_init_list_lru_one(gfp); 489 if (!table[i].mlru) { 490 while (i--) 491 kfree(table[i].mlru); 492 kfree(table); 493 return -ENOMEM; 494 } 495 } 496 497 xas_lock_irqsave(&xas, flags); 498 while (i--) { 499 int index = READ_ONCE(table[i].memcg->kmemcg_id); 500 struct list_lru_memcg *mlru = table[i].mlru; 501 502 xas_set(&xas, index); 503 retry: 504 if (unlikely(index < 0 || xas_error(&xas) || xas_load(&xas))) { 505 kfree(mlru); 506 } else { 507 xas_store(&xas, mlru); 508 if (xas_error(&xas) == -ENOMEM) { 509 xas_unlock_irqrestore(&xas, flags); 510 if (xas_nomem(&xas, gfp)) 511 xas_set_err(&xas, 0); 512 xas_lock_irqsave(&xas, flags); 513 /* 514 * The xas lock has been released, this memcg 515 * can be reparented before us. So reload 516 * memcg id. More details see the comments 517 * in memcg_reparent_list_lrus(). 518 */ 519 index = READ_ONCE(table[i].memcg->kmemcg_id); 520 if (index < 0) 521 xas_set_err(&xas, 0); 522 else if (!xas_error(&xas) && index != xas.xa_index) 523 xas_set(&xas, index); 524 goto retry; 525 } 526 } 527 } 528 /* xas_nomem() is used to free memory instead of memory allocation. */ 529 if (xas.xa_alloc) 530 xas_nomem(&xas, gfp); 531 xas_unlock_irqrestore(&xas, flags); 532 kfree(table); 533 534 return xas_error(&xas); 535 } 536 #else 537 static inline void memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) 538 { 539 } 540 541 static void memcg_destroy_list_lru(struct list_lru *lru) 542 { 543 } 544 #endif /* CONFIG_MEMCG_KMEM */ 545 546 int __list_lru_init(struct list_lru *lru, bool memcg_aware, 547 struct lock_class_key *key, struct shrinker *shrinker) 548 { 549 int i; 550 551 #ifdef CONFIG_MEMCG_KMEM 552 if (shrinker) 553 lru->shrinker_id = shrinker->id; 554 else 555 lru->shrinker_id = -1; 556 557 if (mem_cgroup_kmem_disabled()) 558 memcg_aware = false; 559 #endif 560 561 lru->node = kcalloc(nr_node_ids, sizeof(*lru->node), GFP_KERNEL); 562 if (!lru->node) 563 return -ENOMEM; 564 565 for_each_node(i) { 566 spin_lock_init(&lru->node[i].lock); 567 if (key) 568 lockdep_set_class(&lru->node[i].lock, key); 569 init_one_lru(&lru->node[i].lru); 570 } 571 572 memcg_init_list_lru(lru, memcg_aware); 573 list_lru_register(lru); 574 575 return 0; 576 } 577 EXPORT_SYMBOL_GPL(__list_lru_init); 578 579 void list_lru_destroy(struct list_lru *lru) 580 { 581 /* Already destroyed or not yet initialized? */ 582 if (!lru->node) 583 return; 584 585 list_lru_unregister(lru); 586 587 memcg_destroy_list_lru(lru); 588 kfree(lru->node); 589 lru->node = NULL; 590 591 #ifdef CONFIG_MEMCG_KMEM 592 lru->shrinker_id = -1; 593 #endif 594 } 595 EXPORT_SYMBOL_GPL(list_lru_destroy); 596