1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved. 4 * Authors: David Chinner and Glauber Costa 5 * 6 * Generic LRU infrastructure 7 */ 8 #include <linux/kernel.h> 9 #include <linux/module.h> 10 #include <linux/mm.h> 11 #include <linux/list_lru.h> 12 #include <linux/slab.h> 13 #include <linux/mutex.h> 14 #include <linux/memcontrol.h> 15 #include "slab.h" 16 #include "internal.h" 17 18 #ifdef CONFIG_MEMCG 19 static LIST_HEAD(memcg_list_lrus); 20 static DEFINE_MUTEX(list_lrus_mutex); 21 22 static inline bool list_lru_memcg_aware(struct list_lru *lru) 23 { 24 return lru->memcg_aware; 25 } 26 27 static void list_lru_register(struct list_lru *lru) 28 { 29 if (!list_lru_memcg_aware(lru)) 30 return; 31 32 mutex_lock(&list_lrus_mutex); 33 list_add(&lru->list, &memcg_list_lrus); 34 mutex_unlock(&list_lrus_mutex); 35 } 36 37 static void list_lru_unregister(struct list_lru *lru) 38 { 39 if (!list_lru_memcg_aware(lru)) 40 return; 41 42 mutex_lock(&list_lrus_mutex); 43 list_del(&lru->list); 44 mutex_unlock(&list_lrus_mutex); 45 } 46 47 static int lru_shrinker_id(struct list_lru *lru) 48 { 49 return lru->shrinker_id; 50 } 51 52 static inline struct list_lru_one * 53 list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx) 54 { 55 if (list_lru_memcg_aware(lru) && idx >= 0) { 56 struct list_lru_memcg *mlru = xa_load(&lru->xa, idx); 57 58 return mlru ? &mlru->node[nid] : NULL; 59 } 60 return &lru->node[nid].lru; 61 } 62 63 static inline bool lock_list_lru(struct list_lru_one *l, bool irq) 64 { 65 if (irq) 66 spin_lock_irq(&l->lock); 67 else 68 spin_lock(&l->lock); 69 if (unlikely(READ_ONCE(l->nr_items) == LONG_MIN)) { 70 if (irq) 71 spin_unlock_irq(&l->lock); 72 else 73 spin_unlock(&l->lock); 74 return false; 75 } 76 return true; 77 } 78 79 static inline struct list_lru_one * 80 lock_list_lru_of_memcg(struct list_lru *lru, int nid, struct mem_cgroup *memcg, 81 bool irq, bool skip_empty) 82 { 83 struct list_lru_one *l; 84 85 rcu_read_lock(); 86 again: 87 l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg)); 88 if (likely(l) && lock_list_lru(l, irq)) { 89 rcu_read_unlock(); 90 return l; 91 } 92 /* 93 * Caller may simply bail out if raced with reparenting or 94 * may iterate through the list_lru and expect empty slots. 95 */ 96 if (skip_empty) { 97 rcu_read_unlock(); 98 return NULL; 99 } 100 VM_WARN_ON(!css_is_dying(&memcg->css)); 101 memcg = parent_mem_cgroup(memcg); 102 goto again; 103 } 104 105 static inline void unlock_list_lru(struct list_lru_one *l, bool irq_off) 106 { 107 if (irq_off) 108 spin_unlock_irq(&l->lock); 109 else 110 spin_unlock(&l->lock); 111 } 112 #else 113 static void list_lru_register(struct list_lru *lru) 114 { 115 } 116 117 static void list_lru_unregister(struct list_lru *lru) 118 { 119 } 120 121 static int lru_shrinker_id(struct list_lru *lru) 122 { 123 return -1; 124 } 125 126 static inline bool list_lru_memcg_aware(struct list_lru *lru) 127 { 128 return false; 129 } 130 131 static inline struct list_lru_one * 132 list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx) 133 { 134 return &lru->node[nid].lru; 135 } 136 137 static inline struct list_lru_one * 138 lock_list_lru_of_memcg(struct list_lru *lru, int nid, struct mem_cgroup *memcg, 139 bool irq, bool skip_empty) 140 { 141 struct list_lru_one *l = &lru->node[nid].lru; 142 143 if (irq) 144 spin_lock_irq(&l->lock); 145 else 146 spin_lock(&l->lock); 147 148 return l; 149 } 150 151 static inline void unlock_list_lru(struct list_lru_one *l, bool irq_off) 152 { 153 if (irq_off) 154 spin_unlock_irq(&l->lock); 155 else 156 spin_unlock(&l->lock); 157 } 158 #endif /* CONFIG_MEMCG */ 159 160 /* The caller must ensure the memcg lifetime. */ 161 bool list_lru_add(struct list_lru *lru, struct list_head *item, int nid, 162 struct mem_cgroup *memcg) 163 { 164 struct list_lru_node *nlru = &lru->node[nid]; 165 struct list_lru_one *l; 166 167 l = lock_list_lru_of_memcg(lru, nid, memcg, false, false); 168 if (!l) 169 return false; 170 if (list_empty(item)) { 171 list_add_tail(item, &l->list); 172 /* Set shrinker bit if the first element was added */ 173 if (!l->nr_items++) 174 set_shrinker_bit(memcg, nid, lru_shrinker_id(lru)); 175 unlock_list_lru(l, false); 176 atomic_long_inc(&nlru->nr_items); 177 return true; 178 } 179 unlock_list_lru(l, false); 180 return false; 181 } 182 183 bool list_lru_add_obj(struct list_lru *lru, struct list_head *item) 184 { 185 bool ret; 186 int nid = page_to_nid(virt_to_page(item)); 187 188 if (list_lru_memcg_aware(lru)) { 189 rcu_read_lock(); 190 ret = list_lru_add(lru, item, nid, mem_cgroup_from_slab_obj(item)); 191 rcu_read_unlock(); 192 } else { 193 ret = list_lru_add(lru, item, nid, NULL); 194 } 195 196 return ret; 197 } 198 EXPORT_SYMBOL_GPL(list_lru_add_obj); 199 200 /* The caller must ensure the memcg lifetime. */ 201 bool list_lru_del(struct list_lru *lru, struct list_head *item, int nid, 202 struct mem_cgroup *memcg) 203 { 204 struct list_lru_node *nlru = &lru->node[nid]; 205 struct list_lru_one *l; 206 l = lock_list_lru_of_memcg(lru, nid, memcg, false, false); 207 if (!l) 208 return false; 209 if (!list_empty(item)) { 210 list_del_init(item); 211 l->nr_items--; 212 unlock_list_lru(l, false); 213 atomic_long_dec(&nlru->nr_items); 214 return true; 215 } 216 unlock_list_lru(l, false); 217 return false; 218 } 219 220 bool list_lru_del_obj(struct list_lru *lru, struct list_head *item) 221 { 222 bool ret; 223 int nid = page_to_nid(virt_to_page(item)); 224 225 if (list_lru_memcg_aware(lru)) { 226 rcu_read_lock(); 227 ret = list_lru_del(lru, item, nid, mem_cgroup_from_slab_obj(item)); 228 rcu_read_unlock(); 229 } else { 230 ret = list_lru_del(lru, item, nid, NULL); 231 } 232 233 return ret; 234 } 235 EXPORT_SYMBOL_GPL(list_lru_del_obj); 236 237 void list_lru_isolate(struct list_lru_one *list, struct list_head *item) 238 { 239 list_del_init(item); 240 list->nr_items--; 241 } 242 EXPORT_SYMBOL_GPL(list_lru_isolate); 243 244 void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item, 245 struct list_head *head) 246 { 247 list_move(item, head); 248 list->nr_items--; 249 } 250 EXPORT_SYMBOL_GPL(list_lru_isolate_move); 251 252 unsigned long list_lru_count_one(struct list_lru *lru, 253 int nid, struct mem_cgroup *memcg) 254 { 255 struct list_lru_one *l; 256 long count; 257 258 rcu_read_lock(); 259 l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg)); 260 count = l ? READ_ONCE(l->nr_items) : 0; 261 rcu_read_unlock(); 262 263 if (unlikely(count < 0)) 264 count = 0; 265 266 return count; 267 } 268 EXPORT_SYMBOL_GPL(list_lru_count_one); 269 270 unsigned long list_lru_count_node(struct list_lru *lru, int nid) 271 { 272 struct list_lru_node *nlru; 273 274 nlru = &lru->node[nid]; 275 return atomic_long_read(&nlru->nr_items); 276 } 277 EXPORT_SYMBOL_GPL(list_lru_count_node); 278 279 static unsigned long 280 __list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg, 281 list_lru_walk_cb isolate, void *cb_arg, 282 unsigned long *nr_to_walk, bool irq_off) 283 { 284 struct list_lru_node *nlru = &lru->node[nid]; 285 struct list_lru_one *l = NULL; 286 struct list_head *item, *n; 287 unsigned long isolated = 0; 288 289 restart: 290 l = lock_list_lru_of_memcg(lru, nid, memcg, irq_off, true); 291 if (!l) 292 return isolated; 293 list_for_each_safe(item, n, &l->list) { 294 enum lru_status ret; 295 296 /* 297 * decrement nr_to_walk first so that we don't livelock if we 298 * get stuck on large numbers of LRU_RETRY items 299 */ 300 if (!*nr_to_walk) 301 break; 302 --*nr_to_walk; 303 304 ret = isolate(item, l, cb_arg); 305 switch (ret) { 306 /* 307 * LRU_RETRY, LRU_REMOVED_RETRY and LRU_STOP will drop the lru 308 * lock. List traversal will have to restart from scratch. 309 */ 310 case LRU_RETRY: 311 goto restart; 312 case LRU_REMOVED_RETRY: 313 fallthrough; 314 case LRU_REMOVED: 315 isolated++; 316 atomic_long_dec(&nlru->nr_items); 317 if (ret == LRU_REMOVED_RETRY) 318 goto restart; 319 break; 320 case LRU_ROTATE: 321 list_move_tail(item, &l->list); 322 break; 323 case LRU_SKIP: 324 break; 325 case LRU_STOP: 326 goto out; 327 default: 328 BUG(); 329 } 330 } 331 unlock_list_lru(l, irq_off); 332 out: 333 return isolated; 334 } 335 336 unsigned long 337 list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg, 338 list_lru_walk_cb isolate, void *cb_arg, 339 unsigned long *nr_to_walk) 340 { 341 return __list_lru_walk_one(lru, nid, memcg, isolate, 342 cb_arg, nr_to_walk, false); 343 } 344 EXPORT_SYMBOL_GPL(list_lru_walk_one); 345 346 unsigned long 347 list_lru_walk_one_irq(struct list_lru *lru, int nid, struct mem_cgroup *memcg, 348 list_lru_walk_cb isolate, void *cb_arg, 349 unsigned long *nr_to_walk) 350 { 351 return __list_lru_walk_one(lru, nid, memcg, isolate, 352 cb_arg, nr_to_walk, true); 353 } 354 355 unsigned long list_lru_walk_node(struct list_lru *lru, int nid, 356 list_lru_walk_cb isolate, void *cb_arg, 357 unsigned long *nr_to_walk) 358 { 359 long isolated = 0; 360 361 isolated += list_lru_walk_one(lru, nid, NULL, isolate, cb_arg, 362 nr_to_walk); 363 364 #ifdef CONFIG_MEMCG 365 if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) { 366 struct list_lru_memcg *mlru; 367 struct mem_cgroup *memcg; 368 unsigned long index; 369 370 xa_for_each(&lru->xa, index, mlru) { 371 rcu_read_lock(); 372 memcg = mem_cgroup_from_id(index); 373 if (!mem_cgroup_tryget(memcg)) { 374 rcu_read_unlock(); 375 continue; 376 } 377 rcu_read_unlock(); 378 isolated += __list_lru_walk_one(lru, nid, memcg, 379 isolate, cb_arg, 380 nr_to_walk, false); 381 mem_cgroup_put(memcg); 382 383 if (*nr_to_walk <= 0) 384 break; 385 } 386 } 387 #endif 388 389 return isolated; 390 } 391 EXPORT_SYMBOL_GPL(list_lru_walk_node); 392 393 static void init_one_lru(struct list_lru *lru, struct list_lru_one *l) 394 { 395 INIT_LIST_HEAD(&l->list); 396 spin_lock_init(&l->lock); 397 l->nr_items = 0; 398 #ifdef CONFIG_LOCKDEP 399 if (lru->key) 400 lockdep_set_class(&l->lock, lru->key); 401 #endif 402 } 403 404 #ifdef CONFIG_MEMCG 405 static struct list_lru_memcg *memcg_init_list_lru_one(struct list_lru *lru, gfp_t gfp) 406 { 407 int nid; 408 struct list_lru_memcg *mlru; 409 410 mlru = kmalloc(struct_size(mlru, node, nr_node_ids), gfp); 411 if (!mlru) 412 return NULL; 413 414 for_each_node(nid) 415 init_one_lru(lru, &mlru->node[nid]); 416 417 return mlru; 418 } 419 420 static inline void memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) 421 { 422 if (memcg_aware) 423 xa_init_flags(&lru->xa, XA_FLAGS_LOCK_IRQ); 424 lru->memcg_aware = memcg_aware; 425 } 426 427 static void memcg_destroy_list_lru(struct list_lru *lru) 428 { 429 XA_STATE(xas, &lru->xa, 0); 430 struct list_lru_memcg *mlru; 431 432 if (!list_lru_memcg_aware(lru)) 433 return; 434 435 xas_lock_irq(&xas); 436 xas_for_each(&xas, mlru, ULONG_MAX) { 437 kfree(mlru); 438 xas_store(&xas, NULL); 439 } 440 xas_unlock_irq(&xas); 441 } 442 443 static void memcg_reparent_list_lru_one(struct list_lru *lru, int nid, 444 struct list_lru_one *src, 445 struct mem_cgroup *dst_memcg) 446 { 447 int dst_idx = dst_memcg->kmemcg_id; 448 struct list_lru_one *dst; 449 450 spin_lock_irq(&src->lock); 451 dst = list_lru_from_memcg_idx(lru, nid, dst_idx); 452 spin_lock_nested(&dst->lock, SINGLE_DEPTH_NESTING); 453 454 list_splice_init(&src->list, &dst->list); 455 if (src->nr_items) { 456 WARN_ON(src->nr_items < 0); 457 dst->nr_items += src->nr_items; 458 set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru)); 459 } 460 /* Mark the list_lru_one dead */ 461 src->nr_items = LONG_MIN; 462 463 spin_unlock(&dst->lock); 464 spin_unlock_irq(&src->lock); 465 } 466 467 void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *parent) 468 { 469 struct list_lru *lru; 470 int i; 471 472 mutex_lock(&list_lrus_mutex); 473 list_for_each_entry(lru, &memcg_list_lrus, list) { 474 struct list_lru_memcg *mlru; 475 XA_STATE(xas, &lru->xa, memcg->kmemcg_id); 476 477 /* 478 * Lock the Xarray to ensure no on going list_lru_memcg 479 * allocation and further allocation will see css_is_dying(). 480 */ 481 xas_lock_irq(&xas); 482 mlru = xas_store(&xas, NULL); 483 xas_unlock_irq(&xas); 484 if (!mlru) 485 continue; 486 487 /* 488 * With Xarray value set to NULL, holding the lru lock below 489 * prevents list_lru_{add,del,isolate} from touching the lru, 490 * safe to reparent. 491 */ 492 for_each_node(i) 493 memcg_reparent_list_lru_one(lru, i, &mlru->node[i], parent); 494 495 /* 496 * Here all list_lrus corresponding to the cgroup are guaranteed 497 * to remain empty, we can safely free this lru, any further 498 * memcg_list_lru_alloc() call will simply bail out. 499 */ 500 kvfree_rcu(mlru, rcu); 501 } 502 mutex_unlock(&list_lrus_mutex); 503 } 504 505 static inline bool memcg_list_lru_allocated(struct mem_cgroup *memcg, 506 struct list_lru *lru) 507 { 508 int idx = memcg->kmemcg_id; 509 510 return idx < 0 || xa_load(&lru->xa, idx); 511 } 512 513 int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru, 514 gfp_t gfp) 515 { 516 unsigned long flags; 517 struct list_lru_memcg *mlru = NULL; 518 struct mem_cgroup *pos, *parent; 519 XA_STATE(xas, &lru->xa, 0); 520 521 if (!list_lru_memcg_aware(lru) || memcg_list_lru_allocated(memcg, lru)) 522 return 0; 523 524 gfp &= GFP_RECLAIM_MASK; 525 /* 526 * Because the list_lru can be reparented to the parent cgroup's 527 * list_lru, we should make sure that this cgroup and all its 528 * ancestors have allocated list_lru_memcg. 529 */ 530 do { 531 /* 532 * Keep finding the farest parent that wasn't populated 533 * until found memcg itself. 534 */ 535 pos = memcg; 536 parent = parent_mem_cgroup(pos); 537 while (!memcg_list_lru_allocated(parent, lru)) { 538 pos = parent; 539 parent = parent_mem_cgroup(pos); 540 } 541 542 if (!mlru) { 543 mlru = memcg_init_list_lru_one(lru, gfp); 544 if (!mlru) 545 return -ENOMEM; 546 } 547 xas_set(&xas, pos->kmemcg_id); 548 do { 549 xas_lock_irqsave(&xas, flags); 550 if (!xas_load(&xas) && !css_is_dying(&pos->css)) { 551 xas_store(&xas, mlru); 552 if (!xas_error(&xas)) 553 mlru = NULL; 554 } 555 xas_unlock_irqrestore(&xas, flags); 556 } while (xas_nomem(&xas, gfp)); 557 } while (pos != memcg && !css_is_dying(&pos->css)); 558 559 if (unlikely(mlru)) 560 kfree(mlru); 561 562 return xas_error(&xas); 563 } 564 #else 565 static inline void memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) 566 { 567 } 568 569 static void memcg_destroy_list_lru(struct list_lru *lru) 570 { 571 } 572 #endif /* CONFIG_MEMCG */ 573 574 int __list_lru_init(struct list_lru *lru, bool memcg_aware, struct shrinker *shrinker) 575 { 576 int i; 577 578 #ifdef CONFIG_MEMCG 579 if (shrinker) 580 lru->shrinker_id = shrinker->id; 581 else 582 lru->shrinker_id = -1; 583 584 if (mem_cgroup_kmem_disabled()) 585 memcg_aware = false; 586 #endif 587 588 lru->node = kcalloc(nr_node_ids, sizeof(*lru->node), GFP_KERNEL); 589 if (!lru->node) 590 return -ENOMEM; 591 592 for_each_node(i) 593 init_one_lru(lru, &lru->node[i].lru); 594 595 memcg_init_list_lru(lru, memcg_aware); 596 list_lru_register(lru); 597 598 return 0; 599 } 600 EXPORT_SYMBOL_GPL(__list_lru_init); 601 602 void list_lru_destroy(struct list_lru *lru) 603 { 604 /* Already destroyed or not yet initialized? */ 605 if (!lru->node) 606 return; 607 608 list_lru_unregister(lru); 609 610 memcg_destroy_list_lru(lru); 611 kfree(lru->node); 612 lru->node = NULL; 613 614 #ifdef CONFIG_MEMCG 615 lru->shrinker_id = -1; 616 #endif 617 } 618 EXPORT_SYMBOL_GPL(list_lru_destroy); 619