1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved. 4 * Authors: David Chinner and Glauber Costa 5 * 6 * Generic LRU infrastructure 7 */ 8 #include <linux/kernel.h> 9 #include <linux/module.h> 10 #include <linux/mm.h> 11 #include <linux/list_lru.h> 12 #include <linux/slab.h> 13 #include <linux/mutex.h> 14 #include <linux/memcontrol.h> 15 #include "slab.h" 16 #include "internal.h" 17 18 #ifdef CONFIG_MEMCG 19 static LIST_HEAD(memcg_list_lrus); 20 static DEFINE_MUTEX(list_lrus_mutex); 21 22 static inline bool list_lru_memcg_aware(struct list_lru *lru) 23 { 24 return lru->memcg_aware; 25 } 26 27 static void list_lru_register(struct list_lru *lru) 28 { 29 if (!list_lru_memcg_aware(lru)) 30 return; 31 32 mutex_lock(&list_lrus_mutex); 33 list_add(&lru->list, &memcg_list_lrus); 34 mutex_unlock(&list_lrus_mutex); 35 } 36 37 static void list_lru_unregister(struct list_lru *lru) 38 { 39 if (!list_lru_memcg_aware(lru)) 40 return; 41 42 mutex_lock(&list_lrus_mutex); 43 list_del(&lru->list); 44 mutex_unlock(&list_lrus_mutex); 45 } 46 47 static int lru_shrinker_id(struct list_lru *lru) 48 { 49 return lru->shrinker_id; 50 } 51 52 static inline struct list_lru_one * 53 list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx) 54 { 55 if (list_lru_memcg_aware(lru) && idx >= 0) { 56 struct list_lru_memcg *mlru = xa_load(&lru->xa, idx); 57 58 return mlru ? &mlru->node[nid] : NULL; 59 } 60 return &lru->node[nid].lru; 61 } 62 63 static inline struct list_lru_one * 64 lock_list_lru_of_memcg(struct list_lru *lru, int nid, struct mem_cgroup *memcg, 65 bool irq, bool skip_empty) 66 { 67 struct list_lru_one *l; 68 long nr_items; 69 70 rcu_read_lock(); 71 again: 72 l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg)); 73 if (likely(l)) { 74 if (irq) 75 spin_lock_irq(&l->lock); 76 else 77 spin_lock(&l->lock); 78 nr_items = READ_ONCE(l->nr_items); 79 if (likely(nr_items != LONG_MIN)) { 80 rcu_read_unlock(); 81 return l; 82 } 83 if (irq) 84 spin_unlock_irq(&l->lock); 85 else 86 spin_unlock(&l->lock); 87 } 88 /* 89 * Caller may simply bail out if raced with reparenting or 90 * may iterate through the list_lru and expect empty slots. 91 */ 92 if (skip_empty) { 93 rcu_read_unlock(); 94 return NULL; 95 } 96 VM_WARN_ON(!css_is_dying(&memcg->css)); 97 memcg = parent_mem_cgroup(memcg); 98 goto again; 99 } 100 101 static inline void unlock_list_lru(struct list_lru_one *l, bool irq_off) 102 { 103 if (irq_off) 104 spin_unlock_irq(&l->lock); 105 else 106 spin_unlock(&l->lock); 107 } 108 #else 109 static void list_lru_register(struct list_lru *lru) 110 { 111 } 112 113 static void list_lru_unregister(struct list_lru *lru) 114 { 115 } 116 117 static int lru_shrinker_id(struct list_lru *lru) 118 { 119 return -1; 120 } 121 122 static inline bool list_lru_memcg_aware(struct list_lru *lru) 123 { 124 return false; 125 } 126 127 static inline struct list_lru_one * 128 list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx) 129 { 130 return &lru->node[nid].lru; 131 } 132 133 static inline struct list_lru_one * 134 lock_list_lru_of_memcg(struct list_lru *lru, int nid, struct mem_cgroup *memcg, 135 bool irq, bool skip_empty) 136 { 137 struct list_lru_one *l = &lru->node[nid].lru; 138 139 if (irq) 140 spin_lock_irq(&l->lock); 141 else 142 spin_lock(&l->lock); 143 144 return l; 145 } 146 147 static inline void unlock_list_lru(struct list_lru_one *l, bool irq_off) 148 { 149 if (irq_off) 150 spin_unlock_irq(&l->lock); 151 else 152 spin_unlock(&l->lock); 153 } 154 #endif /* CONFIG_MEMCG */ 155 156 /* The caller must ensure the memcg lifetime. */ 157 bool list_lru_add(struct list_lru *lru, struct list_head *item, int nid, 158 struct mem_cgroup *memcg) 159 { 160 struct list_lru_node *nlru = &lru->node[nid]; 161 struct list_lru_one *l; 162 163 l = lock_list_lru_of_memcg(lru, nid, memcg, false, false); 164 if (!l) 165 return false; 166 if (list_empty(item)) { 167 list_add_tail(item, &l->list); 168 /* Set shrinker bit if the first element was added */ 169 if (!l->nr_items++) 170 set_shrinker_bit(memcg, nid, lru_shrinker_id(lru)); 171 unlock_list_lru(l, false); 172 atomic_long_inc(&nlru->nr_items); 173 return true; 174 } 175 unlock_list_lru(l, false); 176 return false; 177 } 178 179 bool list_lru_add_obj(struct list_lru *lru, struct list_head *item) 180 { 181 bool ret; 182 int nid = page_to_nid(virt_to_page(item)); 183 184 if (list_lru_memcg_aware(lru)) { 185 rcu_read_lock(); 186 ret = list_lru_add(lru, item, nid, mem_cgroup_from_slab_obj(item)); 187 rcu_read_unlock(); 188 } else { 189 ret = list_lru_add(lru, item, nid, NULL); 190 } 191 192 return ret; 193 } 194 EXPORT_SYMBOL_GPL(list_lru_add_obj); 195 196 /* The caller must ensure the memcg lifetime. */ 197 bool list_lru_del(struct list_lru *lru, struct list_head *item, int nid, 198 struct mem_cgroup *memcg) 199 { 200 struct list_lru_node *nlru = &lru->node[nid]; 201 struct list_lru_one *l; 202 l = lock_list_lru_of_memcg(lru, nid, memcg, false, false); 203 if (!l) 204 return false; 205 if (!list_empty(item)) { 206 list_del_init(item); 207 l->nr_items--; 208 unlock_list_lru(l, false); 209 atomic_long_dec(&nlru->nr_items); 210 return true; 211 } 212 unlock_list_lru(l, false); 213 return false; 214 } 215 216 bool list_lru_del_obj(struct list_lru *lru, struct list_head *item) 217 { 218 bool ret; 219 int nid = page_to_nid(virt_to_page(item)); 220 221 if (list_lru_memcg_aware(lru)) { 222 rcu_read_lock(); 223 ret = list_lru_del(lru, item, nid, mem_cgroup_from_slab_obj(item)); 224 rcu_read_unlock(); 225 } else { 226 ret = list_lru_del(lru, item, nid, NULL); 227 } 228 229 return ret; 230 } 231 EXPORT_SYMBOL_GPL(list_lru_del_obj); 232 233 void list_lru_isolate(struct list_lru_one *list, struct list_head *item) 234 { 235 list_del_init(item); 236 list->nr_items--; 237 } 238 EXPORT_SYMBOL_GPL(list_lru_isolate); 239 240 void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item, 241 struct list_head *head) 242 { 243 list_move(item, head); 244 list->nr_items--; 245 } 246 EXPORT_SYMBOL_GPL(list_lru_isolate_move); 247 248 unsigned long list_lru_count_one(struct list_lru *lru, 249 int nid, struct mem_cgroup *memcg) 250 { 251 struct list_lru_one *l; 252 long count; 253 254 rcu_read_lock(); 255 l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg)); 256 count = l ? READ_ONCE(l->nr_items) : 0; 257 rcu_read_unlock(); 258 259 if (unlikely(count < 0)) 260 count = 0; 261 262 return count; 263 } 264 EXPORT_SYMBOL_GPL(list_lru_count_one); 265 266 unsigned long list_lru_count_node(struct list_lru *lru, int nid) 267 { 268 struct list_lru_node *nlru; 269 270 nlru = &lru->node[nid]; 271 return atomic_long_read(&nlru->nr_items); 272 } 273 EXPORT_SYMBOL_GPL(list_lru_count_node); 274 275 static unsigned long 276 __list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg, 277 list_lru_walk_cb isolate, void *cb_arg, 278 unsigned long *nr_to_walk, bool irq_off) 279 { 280 struct list_lru_node *nlru = &lru->node[nid]; 281 struct list_lru_one *l = NULL; 282 struct list_head *item, *n; 283 unsigned long isolated = 0; 284 285 restart: 286 l = lock_list_lru_of_memcg(lru, nid, memcg, irq_off, true); 287 if (!l) 288 return isolated; 289 list_for_each_safe(item, n, &l->list) { 290 enum lru_status ret; 291 292 /* 293 * decrement nr_to_walk first so that we don't livelock if we 294 * get stuck on large numbers of LRU_RETRY items 295 */ 296 if (!*nr_to_walk) 297 break; 298 --*nr_to_walk; 299 300 ret = isolate(item, l, cb_arg); 301 switch (ret) { 302 /* 303 * LRU_RETRY, LRU_REMOVED_RETRY and LRU_STOP will drop the lru 304 * lock. List traversal will have to restart from scratch. 305 */ 306 case LRU_RETRY: 307 goto restart; 308 case LRU_REMOVED_RETRY: 309 fallthrough; 310 case LRU_REMOVED: 311 isolated++; 312 atomic_long_dec(&nlru->nr_items); 313 if (ret == LRU_REMOVED_RETRY) 314 goto restart; 315 break; 316 case LRU_ROTATE: 317 list_move_tail(item, &l->list); 318 break; 319 case LRU_SKIP: 320 break; 321 case LRU_STOP: 322 goto out; 323 default: 324 BUG(); 325 } 326 } 327 unlock_list_lru(l, irq_off); 328 out: 329 return isolated; 330 } 331 332 unsigned long 333 list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg, 334 list_lru_walk_cb isolate, void *cb_arg, 335 unsigned long *nr_to_walk) 336 { 337 return __list_lru_walk_one(lru, nid, memcg, isolate, 338 cb_arg, nr_to_walk, false); 339 } 340 EXPORT_SYMBOL_GPL(list_lru_walk_one); 341 342 unsigned long 343 list_lru_walk_one_irq(struct list_lru *lru, int nid, struct mem_cgroup *memcg, 344 list_lru_walk_cb isolate, void *cb_arg, 345 unsigned long *nr_to_walk) 346 { 347 return __list_lru_walk_one(lru, nid, memcg, isolate, 348 cb_arg, nr_to_walk, true); 349 } 350 351 unsigned long list_lru_walk_node(struct list_lru *lru, int nid, 352 list_lru_walk_cb isolate, void *cb_arg, 353 unsigned long *nr_to_walk) 354 { 355 long isolated = 0; 356 357 isolated += list_lru_walk_one(lru, nid, NULL, isolate, cb_arg, 358 nr_to_walk); 359 360 #ifdef CONFIG_MEMCG 361 if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) { 362 struct list_lru_memcg *mlru; 363 struct mem_cgroup *memcg; 364 unsigned long index; 365 366 xa_for_each(&lru->xa, index, mlru) { 367 rcu_read_lock(); 368 memcg = mem_cgroup_from_id(index); 369 if (!mem_cgroup_tryget(memcg)) { 370 rcu_read_unlock(); 371 continue; 372 } 373 rcu_read_unlock(); 374 isolated += __list_lru_walk_one(lru, nid, memcg, 375 isolate, cb_arg, 376 nr_to_walk, false); 377 mem_cgroup_put(memcg); 378 379 if (*nr_to_walk <= 0) 380 break; 381 } 382 } 383 #endif 384 385 return isolated; 386 } 387 EXPORT_SYMBOL_GPL(list_lru_walk_node); 388 389 static void init_one_lru(struct list_lru *lru, struct list_lru_one *l) 390 { 391 INIT_LIST_HEAD(&l->list); 392 spin_lock_init(&l->lock); 393 l->nr_items = 0; 394 #ifdef CONFIG_LOCKDEP 395 if (lru->key) 396 lockdep_set_class(&l->lock, lru->key); 397 #endif 398 } 399 400 #ifdef CONFIG_MEMCG 401 static struct list_lru_memcg *memcg_init_list_lru_one(struct list_lru *lru, gfp_t gfp) 402 { 403 int nid; 404 struct list_lru_memcg *mlru; 405 406 mlru = kmalloc(struct_size(mlru, node, nr_node_ids), gfp); 407 if (!mlru) 408 return NULL; 409 410 for_each_node(nid) 411 init_one_lru(lru, &mlru->node[nid]); 412 413 return mlru; 414 } 415 416 static inline void memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) 417 { 418 if (memcg_aware) 419 xa_init_flags(&lru->xa, XA_FLAGS_LOCK_IRQ); 420 lru->memcg_aware = memcg_aware; 421 } 422 423 static void memcg_destroy_list_lru(struct list_lru *lru) 424 { 425 XA_STATE(xas, &lru->xa, 0); 426 struct list_lru_memcg *mlru; 427 428 if (!list_lru_memcg_aware(lru)) 429 return; 430 431 xas_lock_irq(&xas); 432 xas_for_each(&xas, mlru, ULONG_MAX) { 433 kfree(mlru); 434 xas_store(&xas, NULL); 435 } 436 xas_unlock_irq(&xas); 437 } 438 439 static void memcg_reparent_list_lru_one(struct list_lru *lru, int nid, 440 struct list_lru_one *src, 441 struct mem_cgroup *dst_memcg) 442 { 443 int dst_idx = dst_memcg->kmemcg_id; 444 struct list_lru_one *dst; 445 446 spin_lock_irq(&src->lock); 447 dst = list_lru_from_memcg_idx(lru, nid, dst_idx); 448 spin_lock_nested(&dst->lock, SINGLE_DEPTH_NESTING); 449 450 list_splice_init(&src->list, &dst->list); 451 if (src->nr_items) { 452 WARN_ON(src->nr_items < 0); 453 dst->nr_items += src->nr_items; 454 set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru)); 455 } 456 /* Mark the list_lru_one dead */ 457 src->nr_items = LONG_MIN; 458 459 spin_unlock(&dst->lock); 460 spin_unlock_irq(&src->lock); 461 } 462 463 void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *parent) 464 { 465 struct list_lru *lru; 466 int i; 467 468 mutex_lock(&list_lrus_mutex); 469 list_for_each_entry(lru, &memcg_list_lrus, list) { 470 struct list_lru_memcg *mlru; 471 XA_STATE(xas, &lru->xa, memcg->kmemcg_id); 472 473 /* 474 * Lock the Xarray to ensure no on going list_lru_memcg 475 * allocation and further allocation will see css_is_dying(). 476 */ 477 xas_lock_irq(&xas); 478 mlru = xas_store(&xas, NULL); 479 xas_unlock_irq(&xas); 480 if (!mlru) 481 continue; 482 483 /* 484 * With Xarray value set to NULL, holding the lru lock below 485 * prevents list_lru_{add,del,isolate} from touching the lru, 486 * safe to reparent. 487 */ 488 for_each_node(i) 489 memcg_reparent_list_lru_one(lru, i, &mlru->node[i], parent); 490 491 /* 492 * Here all list_lrus corresponding to the cgroup are guaranteed 493 * to remain empty, we can safely free this lru, any further 494 * memcg_list_lru_alloc() call will simply bail out. 495 */ 496 kvfree_rcu(mlru, rcu); 497 } 498 mutex_unlock(&list_lrus_mutex); 499 } 500 501 static inline bool memcg_list_lru_allocated(struct mem_cgroup *memcg, 502 struct list_lru *lru) 503 { 504 int idx = memcg->kmemcg_id; 505 506 return idx < 0 || xa_load(&lru->xa, idx); 507 } 508 509 int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru, 510 gfp_t gfp) 511 { 512 unsigned long flags; 513 struct list_lru_memcg *mlru; 514 struct mem_cgroup *pos, *parent; 515 XA_STATE(xas, &lru->xa, 0); 516 517 if (!list_lru_memcg_aware(lru) || memcg_list_lru_allocated(memcg, lru)) 518 return 0; 519 520 gfp &= GFP_RECLAIM_MASK; 521 /* 522 * Because the list_lru can be reparented to the parent cgroup's 523 * list_lru, we should make sure that this cgroup and all its 524 * ancestors have allocated list_lru_memcg. 525 */ 526 do { 527 /* 528 * Keep finding the farest parent that wasn't populated 529 * until found memcg itself. 530 */ 531 pos = memcg; 532 parent = parent_mem_cgroup(pos); 533 while (!memcg_list_lru_allocated(parent, lru)) { 534 pos = parent; 535 parent = parent_mem_cgroup(pos); 536 } 537 538 mlru = memcg_init_list_lru_one(lru, gfp); 539 if (!mlru) 540 return -ENOMEM; 541 xas_set(&xas, pos->kmemcg_id); 542 do { 543 xas_lock_irqsave(&xas, flags); 544 if (!xas_load(&xas) && !css_is_dying(&pos->css)) { 545 xas_store(&xas, mlru); 546 if (!xas_error(&xas)) 547 mlru = NULL; 548 } 549 xas_unlock_irqrestore(&xas, flags); 550 } while (xas_nomem(&xas, gfp)); 551 if (mlru) 552 kfree(mlru); 553 } while (pos != memcg && !css_is_dying(&pos->css)); 554 555 return xas_error(&xas); 556 } 557 #else 558 static inline void memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) 559 { 560 } 561 562 static void memcg_destroy_list_lru(struct list_lru *lru) 563 { 564 } 565 #endif /* CONFIG_MEMCG */ 566 567 int __list_lru_init(struct list_lru *lru, bool memcg_aware, struct shrinker *shrinker) 568 { 569 int i; 570 571 #ifdef CONFIG_MEMCG 572 if (shrinker) 573 lru->shrinker_id = shrinker->id; 574 else 575 lru->shrinker_id = -1; 576 577 if (mem_cgroup_kmem_disabled()) 578 memcg_aware = false; 579 #endif 580 581 lru->node = kcalloc(nr_node_ids, sizeof(*lru->node), GFP_KERNEL); 582 if (!lru->node) 583 return -ENOMEM; 584 585 for_each_node(i) 586 init_one_lru(lru, &lru->node[i].lru); 587 588 memcg_init_list_lru(lru, memcg_aware); 589 list_lru_register(lru); 590 591 return 0; 592 } 593 EXPORT_SYMBOL_GPL(__list_lru_init); 594 595 void list_lru_destroy(struct list_lru *lru) 596 { 597 /* Already destroyed or not yet initialized? */ 598 if (!lru->node) 599 return; 600 601 list_lru_unregister(lru); 602 603 memcg_destroy_list_lru(lru); 604 kfree(lru->node); 605 lru->node = NULL; 606 607 #ifdef CONFIG_MEMCG 608 lru->shrinker_id = -1; 609 #endif 610 } 611 EXPORT_SYMBOL_GPL(list_lru_destroy); 612