1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved. 4 * Authors: David Chinner and Glauber Costa 5 * 6 * Generic LRU infrastructure 7 */ 8 #include <linux/kernel.h> 9 #include <linux/module.h> 10 #include <linux/mm.h> 11 #include <linux/list_lru.h> 12 #include <linux/slab.h> 13 #include <linux/mutex.h> 14 #include <linux/memcontrol.h> 15 #include "slab.h" 16 #include "internal.h" 17 18 #ifdef CONFIG_MEMCG 19 static LIST_HEAD(memcg_list_lrus); 20 static DEFINE_MUTEX(list_lrus_mutex); 21 22 static inline bool list_lru_memcg_aware(struct list_lru *lru) 23 { 24 return lru->memcg_aware; 25 } 26 27 static void list_lru_register(struct list_lru *lru) 28 { 29 if (!list_lru_memcg_aware(lru)) 30 return; 31 32 mutex_lock(&list_lrus_mutex); 33 list_add(&lru->list, &memcg_list_lrus); 34 mutex_unlock(&list_lrus_mutex); 35 } 36 37 static void list_lru_unregister(struct list_lru *lru) 38 { 39 if (!list_lru_memcg_aware(lru)) 40 return; 41 42 mutex_lock(&list_lrus_mutex); 43 list_del(&lru->list); 44 mutex_unlock(&list_lrus_mutex); 45 } 46 47 static int lru_shrinker_id(struct list_lru *lru) 48 { 49 return lru->shrinker_id; 50 } 51 52 static inline struct list_lru_one * 53 list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx) 54 { 55 if (list_lru_memcg_aware(lru) && idx >= 0) { 56 struct list_lru_memcg *mlru = xa_load(&lru->xa, idx); 57 58 return mlru ? &mlru->node[nid] : NULL; 59 } 60 return &lru->node[nid].lru; 61 } 62 63 static inline bool lock_list_lru(struct list_lru_one *l, bool irq) 64 { 65 if (irq) 66 spin_lock_irq(&l->lock); 67 else 68 spin_lock(&l->lock); 69 if (unlikely(READ_ONCE(l->nr_items) == LONG_MIN)) { 70 if (irq) 71 spin_unlock_irq(&l->lock); 72 else 73 spin_unlock(&l->lock); 74 return false; 75 } 76 return true; 77 } 78 79 static inline struct list_lru_one * 80 lock_list_lru_of_memcg(struct list_lru *lru, int nid, struct mem_cgroup *memcg, 81 bool irq, bool skip_empty) 82 { 83 struct list_lru_one *l; 84 85 rcu_read_lock(); 86 again: 87 l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg)); 88 if (likely(l) && lock_list_lru(l, irq)) { 89 rcu_read_unlock(); 90 return l; 91 } 92 /* 93 * Caller may simply bail out if raced with reparenting or 94 * may iterate through the list_lru and expect empty slots. 95 */ 96 if (skip_empty) { 97 rcu_read_unlock(); 98 return NULL; 99 } 100 VM_WARN_ON(!css_is_dying(&memcg->css)); 101 memcg = parent_mem_cgroup(memcg); 102 goto again; 103 } 104 105 static inline void unlock_list_lru(struct list_lru_one *l, bool irq_off) 106 { 107 if (irq_off) 108 spin_unlock_irq(&l->lock); 109 else 110 spin_unlock(&l->lock); 111 } 112 #else 113 static void list_lru_register(struct list_lru *lru) 114 { 115 } 116 117 static void list_lru_unregister(struct list_lru *lru) 118 { 119 } 120 121 static int lru_shrinker_id(struct list_lru *lru) 122 { 123 return -1; 124 } 125 126 static inline bool list_lru_memcg_aware(struct list_lru *lru) 127 { 128 return false; 129 } 130 131 static inline struct list_lru_one * 132 list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx) 133 { 134 return &lru->node[nid].lru; 135 } 136 137 static inline struct list_lru_one * 138 lock_list_lru_of_memcg(struct list_lru *lru, int nid, struct mem_cgroup *memcg, 139 bool irq, bool skip_empty) 140 { 141 struct list_lru_one *l = &lru->node[nid].lru; 142 143 if (irq) 144 spin_lock_irq(&l->lock); 145 else 146 spin_lock(&l->lock); 147 148 return l; 149 } 150 151 static inline void unlock_list_lru(struct list_lru_one *l, bool irq_off) 152 { 153 if (irq_off) 154 spin_unlock_irq(&l->lock); 155 else 156 spin_unlock(&l->lock); 157 } 158 #endif /* CONFIG_MEMCG */ 159 160 /* The caller must ensure the memcg lifetime. */ 161 bool list_lru_add(struct list_lru *lru, struct list_head *item, int nid, 162 struct mem_cgroup *memcg) 163 { 164 struct list_lru_node *nlru = &lru->node[nid]; 165 struct list_lru_one *l; 166 167 l = lock_list_lru_of_memcg(lru, nid, memcg, false, false); 168 if (!l) 169 return false; 170 if (list_empty(item)) { 171 list_add_tail(item, &l->list); 172 /* Set shrinker bit if the first element was added */ 173 if (!l->nr_items++) 174 set_shrinker_bit(memcg, nid, lru_shrinker_id(lru)); 175 unlock_list_lru(l, false); 176 atomic_long_inc(&nlru->nr_items); 177 return true; 178 } 179 unlock_list_lru(l, false); 180 return false; 181 } 182 EXPORT_SYMBOL_GPL(list_lru_add); 183 184 bool list_lru_add_obj(struct list_lru *lru, struct list_head *item) 185 { 186 bool ret; 187 int nid = page_to_nid(virt_to_page(item)); 188 189 if (list_lru_memcg_aware(lru)) { 190 rcu_read_lock(); 191 ret = list_lru_add(lru, item, nid, mem_cgroup_from_virt(item)); 192 rcu_read_unlock(); 193 } else { 194 ret = list_lru_add(lru, item, nid, NULL); 195 } 196 197 return ret; 198 } 199 EXPORT_SYMBOL_GPL(list_lru_add_obj); 200 201 /* The caller must ensure the memcg lifetime. */ 202 bool list_lru_del(struct list_lru *lru, struct list_head *item, int nid, 203 struct mem_cgroup *memcg) 204 { 205 struct list_lru_node *nlru = &lru->node[nid]; 206 struct list_lru_one *l; 207 l = lock_list_lru_of_memcg(lru, nid, memcg, false, false); 208 if (!l) 209 return false; 210 if (!list_empty(item)) { 211 list_del_init(item); 212 l->nr_items--; 213 unlock_list_lru(l, false); 214 atomic_long_dec(&nlru->nr_items); 215 return true; 216 } 217 unlock_list_lru(l, false); 218 return false; 219 } 220 221 bool list_lru_del_obj(struct list_lru *lru, struct list_head *item) 222 { 223 bool ret; 224 int nid = page_to_nid(virt_to_page(item)); 225 226 if (list_lru_memcg_aware(lru)) { 227 rcu_read_lock(); 228 ret = list_lru_del(lru, item, nid, mem_cgroup_from_virt(item)); 229 rcu_read_unlock(); 230 } else { 231 ret = list_lru_del(lru, item, nid, NULL); 232 } 233 234 return ret; 235 } 236 EXPORT_SYMBOL_GPL(list_lru_del_obj); 237 238 void list_lru_isolate(struct list_lru_one *list, struct list_head *item) 239 { 240 list_del_init(item); 241 list->nr_items--; 242 } 243 EXPORT_SYMBOL_GPL(list_lru_isolate); 244 245 void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item, 246 struct list_head *head) 247 { 248 list_move(item, head); 249 list->nr_items--; 250 } 251 EXPORT_SYMBOL_GPL(list_lru_isolate_move); 252 253 unsigned long list_lru_count_one(struct list_lru *lru, 254 int nid, struct mem_cgroup *memcg) 255 { 256 struct list_lru_one *l; 257 long count; 258 259 rcu_read_lock(); 260 l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg)); 261 count = l ? READ_ONCE(l->nr_items) : 0; 262 rcu_read_unlock(); 263 264 if (unlikely(count < 0)) 265 count = 0; 266 267 return count; 268 } 269 EXPORT_SYMBOL_GPL(list_lru_count_one); 270 271 unsigned long list_lru_count_node(struct list_lru *lru, int nid) 272 { 273 struct list_lru_node *nlru; 274 275 nlru = &lru->node[nid]; 276 return atomic_long_read(&nlru->nr_items); 277 } 278 EXPORT_SYMBOL_GPL(list_lru_count_node); 279 280 static unsigned long 281 __list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg, 282 list_lru_walk_cb isolate, void *cb_arg, 283 unsigned long *nr_to_walk, bool irq_off) 284 { 285 struct list_lru_node *nlru = &lru->node[nid]; 286 struct list_lru_one *l = NULL; 287 struct list_head *item, *n; 288 unsigned long isolated = 0; 289 290 restart: 291 l = lock_list_lru_of_memcg(lru, nid, memcg, irq_off, true); 292 if (!l) 293 return isolated; 294 list_for_each_safe(item, n, &l->list) { 295 enum lru_status ret; 296 297 /* 298 * decrement nr_to_walk first so that we don't livelock if we 299 * get stuck on large numbers of LRU_RETRY items 300 */ 301 if (!*nr_to_walk) 302 break; 303 --*nr_to_walk; 304 305 ret = isolate(item, l, cb_arg); 306 switch (ret) { 307 /* 308 * LRU_RETRY, LRU_REMOVED_RETRY and LRU_STOP will drop the lru 309 * lock. List traversal will have to restart from scratch. 310 */ 311 case LRU_RETRY: 312 goto restart; 313 case LRU_REMOVED_RETRY: 314 fallthrough; 315 case LRU_REMOVED: 316 isolated++; 317 atomic_long_dec(&nlru->nr_items); 318 if (ret == LRU_REMOVED_RETRY) 319 goto restart; 320 break; 321 case LRU_ROTATE: 322 list_move_tail(item, &l->list); 323 break; 324 case LRU_SKIP: 325 break; 326 case LRU_STOP: 327 goto out; 328 default: 329 BUG(); 330 } 331 } 332 unlock_list_lru(l, irq_off); 333 out: 334 return isolated; 335 } 336 337 unsigned long 338 list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg, 339 list_lru_walk_cb isolate, void *cb_arg, 340 unsigned long *nr_to_walk) 341 { 342 return __list_lru_walk_one(lru, nid, memcg, isolate, 343 cb_arg, nr_to_walk, false); 344 } 345 EXPORT_SYMBOL_GPL(list_lru_walk_one); 346 347 unsigned long 348 list_lru_walk_one_irq(struct list_lru *lru, int nid, struct mem_cgroup *memcg, 349 list_lru_walk_cb isolate, void *cb_arg, 350 unsigned long *nr_to_walk) 351 { 352 return __list_lru_walk_one(lru, nid, memcg, isolate, 353 cb_arg, nr_to_walk, true); 354 } 355 356 unsigned long list_lru_walk_node(struct list_lru *lru, int nid, 357 list_lru_walk_cb isolate, void *cb_arg, 358 unsigned long *nr_to_walk) 359 { 360 long isolated = 0; 361 362 isolated += list_lru_walk_one(lru, nid, NULL, isolate, cb_arg, 363 nr_to_walk); 364 365 #ifdef CONFIG_MEMCG 366 if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) { 367 struct list_lru_memcg *mlru; 368 struct mem_cgroup *memcg; 369 unsigned long index; 370 371 xa_for_each(&lru->xa, index, mlru) { 372 rcu_read_lock(); 373 memcg = mem_cgroup_from_private_id(index); 374 if (!mem_cgroup_tryget(memcg)) { 375 rcu_read_unlock(); 376 continue; 377 } 378 rcu_read_unlock(); 379 isolated += __list_lru_walk_one(lru, nid, memcg, 380 isolate, cb_arg, 381 nr_to_walk, false); 382 mem_cgroup_put(memcg); 383 384 if (*nr_to_walk <= 0) 385 break; 386 } 387 } 388 #endif 389 390 return isolated; 391 } 392 EXPORT_SYMBOL_GPL(list_lru_walk_node); 393 394 static void init_one_lru(struct list_lru *lru, struct list_lru_one *l) 395 { 396 INIT_LIST_HEAD(&l->list); 397 spin_lock_init(&l->lock); 398 l->nr_items = 0; 399 #ifdef CONFIG_LOCKDEP 400 if (lru->key) 401 lockdep_set_class(&l->lock, lru->key); 402 #endif 403 } 404 405 #ifdef CONFIG_MEMCG 406 static struct list_lru_memcg *memcg_init_list_lru_one(struct list_lru *lru, gfp_t gfp) 407 { 408 int nid; 409 struct list_lru_memcg *mlru; 410 411 mlru = kmalloc_flex(*mlru, node, nr_node_ids, gfp); 412 if (!mlru) 413 return NULL; 414 415 for_each_node(nid) 416 init_one_lru(lru, &mlru->node[nid]); 417 418 return mlru; 419 } 420 421 static inline void memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) 422 { 423 if (memcg_aware) 424 xa_init_flags(&lru->xa, XA_FLAGS_LOCK_IRQ); 425 lru->memcg_aware = memcg_aware; 426 } 427 428 static void memcg_destroy_list_lru(struct list_lru *lru) 429 { 430 XA_STATE(xas, &lru->xa, 0); 431 struct list_lru_memcg *mlru; 432 433 if (!list_lru_memcg_aware(lru)) 434 return; 435 436 xas_lock_irq(&xas); 437 xas_for_each(&xas, mlru, ULONG_MAX) { 438 kfree(mlru); 439 xas_store(&xas, NULL); 440 } 441 xas_unlock_irq(&xas); 442 } 443 444 static void memcg_reparent_list_lru_one(struct list_lru *lru, int nid, 445 struct list_lru_one *src, 446 struct mem_cgroup *dst_memcg) 447 { 448 int dst_idx = dst_memcg->kmemcg_id; 449 struct list_lru_one *dst; 450 451 spin_lock_irq(&src->lock); 452 dst = list_lru_from_memcg_idx(lru, nid, dst_idx); 453 spin_lock_nested(&dst->lock, SINGLE_DEPTH_NESTING); 454 455 list_splice_init(&src->list, &dst->list); 456 if (src->nr_items) { 457 WARN_ON(src->nr_items < 0); 458 dst->nr_items += src->nr_items; 459 set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru)); 460 } 461 /* Mark the list_lru_one dead */ 462 src->nr_items = LONG_MIN; 463 464 spin_unlock(&dst->lock); 465 spin_unlock_irq(&src->lock); 466 } 467 468 void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *parent) 469 { 470 struct list_lru *lru; 471 int i; 472 473 mutex_lock(&list_lrus_mutex); 474 list_for_each_entry(lru, &memcg_list_lrus, list) { 475 struct list_lru_memcg *mlru; 476 477 /* 478 * css_is_dying() check in memcg_list_lru_alloc() avoids 479 * allocating a new mlru since CSS_DYING is already set for this 480 * memcg a rcu grace period ago. 481 */ 482 mlru = xa_load(&lru->xa, memcg->kmemcg_id); 483 if (!mlru) 484 continue; 485 486 /* 487 * Reparent each per-node list and mark the child dead 488 * (LONG_MIN) before clearing xarray entry otherwise a 489 * concurrent list_lru_del() may corrupt the list if it arrives 490 * after xarray clear but before reparenting as 491 * lock_list_lru_of_memcg will acquire parent's lock while the 492 * item is still on child's list. 493 */ 494 for_each_node(i) 495 memcg_reparent_list_lru_one(lru, i, &mlru->node[i], parent); 496 497 xa_erase_irq(&lru->xa, memcg->kmemcg_id); 498 499 /* 500 * Here all list_lrus corresponding to the cgroup are guaranteed 501 * to remain empty, we can safely free this lru, any further 502 * memcg_list_lru_alloc() call will simply bail out. 503 */ 504 kvfree_rcu(mlru, rcu); 505 } 506 mutex_unlock(&list_lrus_mutex); 507 } 508 509 static inline bool memcg_list_lru_allocated(struct mem_cgroup *memcg, 510 struct list_lru *lru) 511 { 512 int idx = memcg->kmemcg_id; 513 514 return idx < 0 || xa_load(&lru->xa, idx); 515 } 516 517 int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru, 518 gfp_t gfp) 519 { 520 unsigned long flags; 521 struct list_lru_memcg *mlru = NULL; 522 struct mem_cgroup *pos, *parent; 523 XA_STATE(xas, &lru->xa, 0); 524 525 if (!list_lru_memcg_aware(lru) || memcg_list_lru_allocated(memcg, lru)) 526 return 0; 527 528 gfp &= GFP_RECLAIM_MASK; 529 /* 530 * Because the list_lru can be reparented to the parent cgroup's 531 * list_lru, we should make sure that this cgroup and all its 532 * ancestors have allocated list_lru_memcg. 533 */ 534 do { 535 /* 536 * Keep finding the farest parent that wasn't populated 537 * until found memcg itself. 538 */ 539 pos = memcg; 540 parent = parent_mem_cgroup(pos); 541 while (!memcg_list_lru_allocated(parent, lru)) { 542 pos = parent; 543 parent = parent_mem_cgroup(pos); 544 } 545 546 if (!mlru) { 547 mlru = memcg_init_list_lru_one(lru, gfp); 548 if (!mlru) 549 return -ENOMEM; 550 } 551 xas_set(&xas, pos->kmemcg_id); 552 do { 553 xas_lock_irqsave(&xas, flags); 554 if (!xas_load(&xas) && !css_is_dying(&pos->css)) { 555 xas_store(&xas, mlru); 556 if (!xas_error(&xas)) 557 mlru = NULL; 558 } 559 xas_unlock_irqrestore(&xas, flags); 560 } while (xas_nomem(&xas, gfp)); 561 } while (pos != memcg && !css_is_dying(&pos->css)); 562 563 if (unlikely(mlru)) 564 kfree(mlru); 565 566 return xas_error(&xas); 567 } 568 #else 569 static inline void memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) 570 { 571 } 572 573 static void memcg_destroy_list_lru(struct list_lru *lru) 574 { 575 } 576 #endif /* CONFIG_MEMCG */ 577 578 int __list_lru_init(struct list_lru *lru, bool memcg_aware, struct shrinker *shrinker) 579 { 580 int i; 581 582 #ifdef CONFIG_MEMCG 583 if (shrinker) 584 lru->shrinker_id = shrinker->id; 585 else 586 lru->shrinker_id = -1; 587 588 if (mem_cgroup_kmem_disabled()) 589 memcg_aware = false; 590 #endif 591 592 lru->node = kzalloc_objs(*lru->node, nr_node_ids); 593 if (!lru->node) 594 return -ENOMEM; 595 596 for_each_node(i) 597 init_one_lru(lru, &lru->node[i].lru); 598 599 memcg_init_list_lru(lru, memcg_aware); 600 list_lru_register(lru); 601 602 return 0; 603 } 604 EXPORT_SYMBOL_GPL(__list_lru_init); 605 606 void list_lru_destroy(struct list_lru *lru) 607 { 608 /* Already destroyed or not yet initialized? */ 609 if (!lru->node) 610 return; 611 612 list_lru_unregister(lru); 613 614 memcg_destroy_list_lru(lru); 615 kfree(lru->node); 616 lru->node = NULL; 617 618 #ifdef CONFIG_MEMCG 619 lru->shrinker_id = -1; 620 #endif 621 } 622 EXPORT_SYMBOL_GPL(list_lru_destroy); 623