xref: /linux/mm/list_lru.c (revision c34e9ab9a612ee8b18273398ef75c207b01f516d)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved.
4  * Authors: David Chinner and Glauber Costa
5  *
6  * Generic LRU infrastructure
7  */
8 #include <linux/kernel.h>
9 #include <linux/module.h>
10 #include <linux/mm.h>
11 #include <linux/list_lru.h>
12 #include <linux/slab.h>
13 #include <linux/mutex.h>
14 #include <linux/memcontrol.h>
15 #include "slab.h"
16 #include "internal.h"
17 
18 #ifdef CONFIG_MEMCG
19 static LIST_HEAD(memcg_list_lrus);
20 static DEFINE_MUTEX(list_lrus_mutex);
21 
22 static inline bool list_lru_memcg_aware(struct list_lru *lru)
23 {
24 	return lru->memcg_aware;
25 }
26 
27 static void list_lru_register(struct list_lru *lru)
28 {
29 	if (!list_lru_memcg_aware(lru))
30 		return;
31 
32 	mutex_lock(&list_lrus_mutex);
33 	list_add(&lru->list, &memcg_list_lrus);
34 	mutex_unlock(&list_lrus_mutex);
35 }
36 
37 static void list_lru_unregister(struct list_lru *lru)
38 {
39 	if (!list_lru_memcg_aware(lru))
40 		return;
41 
42 	mutex_lock(&list_lrus_mutex);
43 	list_del(&lru->list);
44 	mutex_unlock(&list_lrus_mutex);
45 }
46 
47 static int lru_shrinker_id(struct list_lru *lru)
48 {
49 	return lru->shrinker_id;
50 }
51 
52 static inline struct list_lru_one *
53 list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx)
54 {
55 	if (list_lru_memcg_aware(lru) && idx >= 0) {
56 		struct list_lru_memcg *mlru = xa_load(&lru->xa, idx);
57 
58 		return mlru ? &mlru->node[nid] : NULL;
59 	}
60 	return &lru->node[nid].lru;
61 }
62 
63 static inline struct list_lru_one *
64 lock_list_lru_of_memcg(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
65 		       bool irq, bool skip_empty)
66 {
67 	struct list_lru_one *l;
68 	long nr_items;
69 
70 	rcu_read_lock();
71 again:
72 	l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg));
73 	if (likely(l)) {
74 		if (irq)
75 			spin_lock_irq(&l->lock);
76 		else
77 			spin_lock(&l->lock);
78 		nr_items = READ_ONCE(l->nr_items);
79 		if (likely(nr_items != LONG_MIN)) {
80 			WARN_ON(nr_items < 0);
81 			rcu_read_unlock();
82 			return l;
83 		}
84 		if (irq)
85 			spin_unlock_irq(&l->lock);
86 		else
87 			spin_unlock(&l->lock);
88 	}
89 	/*
90 	 * Caller may simply bail out if raced with reparenting or
91 	 * may iterate through the list_lru and expect empty slots.
92 	 */
93 	if (skip_empty) {
94 		rcu_read_unlock();
95 		return NULL;
96 	}
97 	VM_WARN_ON(!css_is_dying(&memcg->css));
98 	memcg = parent_mem_cgroup(memcg);
99 	goto again;
100 }
101 
102 static inline void unlock_list_lru(struct list_lru_one *l, bool irq_off)
103 {
104 	if (irq_off)
105 		spin_unlock_irq(&l->lock);
106 	else
107 		spin_unlock(&l->lock);
108 }
109 #else
110 static void list_lru_register(struct list_lru *lru)
111 {
112 }
113 
114 static void list_lru_unregister(struct list_lru *lru)
115 {
116 }
117 
118 static int lru_shrinker_id(struct list_lru *lru)
119 {
120 	return -1;
121 }
122 
123 static inline bool list_lru_memcg_aware(struct list_lru *lru)
124 {
125 	return false;
126 }
127 
128 static inline struct list_lru_one *
129 list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx)
130 {
131 	return &lru->node[nid].lru;
132 }
133 
134 static inline struct list_lru_one *
135 lock_list_lru_of_memcg(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
136 		       bool irq, bool skip_empty)
137 {
138 	struct list_lru_one *l = &lru->node[nid].lru;
139 
140 	if (irq)
141 		spin_lock_irq(&l->lock);
142 	else
143 		spin_lock(&l->lock);
144 
145 	return l;
146 }
147 
148 static inline void unlock_list_lru(struct list_lru_one *l, bool irq_off)
149 {
150 	if (irq_off)
151 		spin_unlock_irq(&l->lock);
152 	else
153 		spin_unlock(&l->lock);
154 }
155 #endif /* CONFIG_MEMCG */
156 
157 /* The caller must ensure the memcg lifetime. */
158 bool list_lru_add(struct list_lru *lru, struct list_head *item, int nid,
159 		  struct mem_cgroup *memcg)
160 {
161 	struct list_lru_node *nlru = &lru->node[nid];
162 	struct list_lru_one *l;
163 
164 	l = lock_list_lru_of_memcg(lru, nid, memcg, false, false);
165 	if (!l)
166 		return false;
167 	if (list_empty(item)) {
168 		list_add_tail(item, &l->list);
169 		/* Set shrinker bit if the first element was added */
170 		if (!l->nr_items++)
171 			set_shrinker_bit(memcg, nid, lru_shrinker_id(lru));
172 		unlock_list_lru(l, false);
173 		atomic_long_inc(&nlru->nr_items);
174 		return true;
175 	}
176 	unlock_list_lru(l, false);
177 	return false;
178 }
179 
180 bool list_lru_add_obj(struct list_lru *lru, struct list_head *item)
181 {
182 	bool ret;
183 	int nid = page_to_nid(virt_to_page(item));
184 
185 	if (list_lru_memcg_aware(lru)) {
186 		rcu_read_lock();
187 		ret = list_lru_add(lru, item, nid, mem_cgroup_from_slab_obj(item));
188 		rcu_read_unlock();
189 	} else {
190 		ret = list_lru_add(lru, item, nid, NULL);
191 	}
192 
193 	return ret;
194 }
195 EXPORT_SYMBOL_GPL(list_lru_add_obj);
196 
197 /* The caller must ensure the memcg lifetime. */
198 bool list_lru_del(struct list_lru *lru, struct list_head *item, int nid,
199 		  struct mem_cgroup *memcg)
200 {
201 	struct list_lru_node *nlru = &lru->node[nid];
202 	struct list_lru_one *l;
203 	l = lock_list_lru_of_memcg(lru, nid, memcg, false, false);
204 	if (!l)
205 		return false;
206 	if (!list_empty(item)) {
207 		list_del_init(item);
208 		l->nr_items--;
209 		unlock_list_lru(l, false);
210 		atomic_long_dec(&nlru->nr_items);
211 		return true;
212 	}
213 	unlock_list_lru(l, false);
214 	return false;
215 }
216 
217 bool list_lru_del_obj(struct list_lru *lru, struct list_head *item)
218 {
219 	bool ret;
220 	int nid = page_to_nid(virt_to_page(item));
221 
222 	if (list_lru_memcg_aware(lru)) {
223 		rcu_read_lock();
224 		ret = list_lru_del(lru, item, nid, mem_cgroup_from_slab_obj(item));
225 		rcu_read_unlock();
226 	} else {
227 		ret = list_lru_del(lru, item, nid, NULL);
228 	}
229 
230 	return ret;
231 }
232 EXPORT_SYMBOL_GPL(list_lru_del_obj);
233 
234 void list_lru_isolate(struct list_lru_one *list, struct list_head *item)
235 {
236 	list_del_init(item);
237 	list->nr_items--;
238 }
239 EXPORT_SYMBOL_GPL(list_lru_isolate);
240 
241 void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item,
242 			   struct list_head *head)
243 {
244 	list_move(item, head);
245 	list->nr_items--;
246 }
247 EXPORT_SYMBOL_GPL(list_lru_isolate_move);
248 
249 unsigned long list_lru_count_one(struct list_lru *lru,
250 				 int nid, struct mem_cgroup *memcg)
251 {
252 	struct list_lru_one *l;
253 	long count;
254 
255 	rcu_read_lock();
256 	l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg));
257 	count = l ? READ_ONCE(l->nr_items) : 0;
258 	rcu_read_unlock();
259 
260 	if (unlikely(count < 0))
261 		count = 0;
262 
263 	return count;
264 }
265 EXPORT_SYMBOL_GPL(list_lru_count_one);
266 
267 unsigned long list_lru_count_node(struct list_lru *lru, int nid)
268 {
269 	struct list_lru_node *nlru;
270 
271 	nlru = &lru->node[nid];
272 	return atomic_long_read(&nlru->nr_items);
273 }
274 EXPORT_SYMBOL_GPL(list_lru_count_node);
275 
276 static unsigned long
277 __list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
278 		    list_lru_walk_cb isolate, void *cb_arg,
279 		    unsigned long *nr_to_walk, bool irq_off)
280 {
281 	struct list_lru_node *nlru = &lru->node[nid];
282 	struct list_lru_one *l = NULL;
283 	struct list_head *item, *n;
284 	unsigned long isolated = 0;
285 
286 restart:
287 	l = lock_list_lru_of_memcg(lru, nid, memcg, irq_off, true);
288 	if (!l)
289 		return isolated;
290 	list_for_each_safe(item, n, &l->list) {
291 		enum lru_status ret;
292 
293 		/*
294 		 * decrement nr_to_walk first so that we don't livelock if we
295 		 * get stuck on large numbers of LRU_RETRY items
296 		 */
297 		if (!*nr_to_walk)
298 			break;
299 		--*nr_to_walk;
300 
301 		ret = isolate(item, l, cb_arg);
302 		switch (ret) {
303 		/*
304 		 * LRU_RETRY, LRU_REMOVED_RETRY and LRU_STOP will drop the lru
305 		 * lock. List traversal will have to restart from scratch.
306 		 */
307 		case LRU_RETRY:
308 			goto restart;
309 		case LRU_REMOVED_RETRY:
310 			fallthrough;
311 		case LRU_REMOVED:
312 			isolated++;
313 			atomic_long_dec(&nlru->nr_items);
314 			if (ret == LRU_REMOVED_RETRY)
315 				goto restart;
316 			break;
317 		case LRU_ROTATE:
318 			list_move_tail(item, &l->list);
319 			break;
320 		case LRU_SKIP:
321 			break;
322 		case LRU_STOP:
323 			goto out;
324 		default:
325 			BUG();
326 		}
327 	}
328 	unlock_list_lru(l, irq_off);
329 out:
330 	return isolated;
331 }
332 
333 unsigned long
334 list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
335 		  list_lru_walk_cb isolate, void *cb_arg,
336 		  unsigned long *nr_to_walk)
337 {
338 	return __list_lru_walk_one(lru, nid, memcg, isolate,
339 				   cb_arg, nr_to_walk, false);
340 }
341 EXPORT_SYMBOL_GPL(list_lru_walk_one);
342 
343 unsigned long
344 list_lru_walk_one_irq(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
345 		      list_lru_walk_cb isolate, void *cb_arg,
346 		      unsigned long *nr_to_walk)
347 {
348 	return __list_lru_walk_one(lru, nid, memcg, isolate,
349 				   cb_arg, nr_to_walk, true);
350 }
351 
352 unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
353 				 list_lru_walk_cb isolate, void *cb_arg,
354 				 unsigned long *nr_to_walk)
355 {
356 	long isolated = 0;
357 
358 	isolated += list_lru_walk_one(lru, nid, NULL, isolate, cb_arg,
359 				      nr_to_walk);
360 
361 #ifdef CONFIG_MEMCG
362 	if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) {
363 		struct list_lru_memcg *mlru;
364 		struct mem_cgroup *memcg;
365 		unsigned long index;
366 
367 		xa_for_each(&lru->xa, index, mlru) {
368 			rcu_read_lock();
369 			memcg = mem_cgroup_from_id(index);
370 			if (!mem_cgroup_tryget(memcg)) {
371 				rcu_read_unlock();
372 				continue;
373 			}
374 			rcu_read_unlock();
375 			isolated += __list_lru_walk_one(lru, nid, memcg,
376 							isolate, cb_arg,
377 							nr_to_walk, false);
378 			mem_cgroup_put(memcg);
379 
380 			if (*nr_to_walk <= 0)
381 				break;
382 		}
383 	}
384 #endif
385 
386 	return isolated;
387 }
388 EXPORT_SYMBOL_GPL(list_lru_walk_node);
389 
390 static void init_one_lru(struct list_lru *lru, struct list_lru_one *l)
391 {
392 	INIT_LIST_HEAD(&l->list);
393 	spin_lock_init(&l->lock);
394 	l->nr_items = 0;
395 #ifdef CONFIG_LOCKDEP
396 	if (lru->key)
397 		lockdep_set_class(&l->lock, lru->key);
398 #endif
399 }
400 
401 #ifdef CONFIG_MEMCG
402 static struct list_lru_memcg *memcg_init_list_lru_one(struct list_lru *lru, gfp_t gfp)
403 {
404 	int nid;
405 	struct list_lru_memcg *mlru;
406 
407 	mlru = kmalloc(struct_size(mlru, node, nr_node_ids), gfp);
408 	if (!mlru)
409 		return NULL;
410 
411 	for_each_node(nid)
412 		init_one_lru(lru, &mlru->node[nid]);
413 
414 	return mlru;
415 }
416 
417 static inline void memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
418 {
419 	if (memcg_aware)
420 		xa_init_flags(&lru->xa, XA_FLAGS_LOCK_IRQ);
421 	lru->memcg_aware = memcg_aware;
422 }
423 
424 static void memcg_destroy_list_lru(struct list_lru *lru)
425 {
426 	XA_STATE(xas, &lru->xa, 0);
427 	struct list_lru_memcg *mlru;
428 
429 	if (!list_lru_memcg_aware(lru))
430 		return;
431 
432 	xas_lock_irq(&xas);
433 	xas_for_each(&xas, mlru, ULONG_MAX) {
434 		kfree(mlru);
435 		xas_store(&xas, NULL);
436 	}
437 	xas_unlock_irq(&xas);
438 }
439 
440 static void memcg_reparent_list_lru_one(struct list_lru *lru, int nid,
441 					struct list_lru_one *src,
442 					struct mem_cgroup *dst_memcg)
443 {
444 	int dst_idx = dst_memcg->kmemcg_id;
445 	struct list_lru_one *dst;
446 
447 	spin_lock_irq(&src->lock);
448 	dst = list_lru_from_memcg_idx(lru, nid, dst_idx);
449 	spin_lock_nested(&dst->lock, SINGLE_DEPTH_NESTING);
450 
451 	list_splice_init(&src->list, &dst->list);
452 	if (src->nr_items) {
453 		dst->nr_items += src->nr_items;
454 		set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru));
455 	}
456 	/* Mark the list_lru_one dead */
457 	src->nr_items = LONG_MIN;
458 
459 	spin_unlock(&dst->lock);
460 	spin_unlock_irq(&src->lock);
461 }
462 
463 void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *parent)
464 {
465 	struct list_lru *lru;
466 	int i;
467 
468 	mutex_lock(&list_lrus_mutex);
469 	list_for_each_entry(lru, &memcg_list_lrus, list) {
470 		struct list_lru_memcg *mlru;
471 		XA_STATE(xas, &lru->xa, memcg->kmemcg_id);
472 
473 		/*
474 		 * Lock the Xarray to ensure no on going list_lru_memcg
475 		 * allocation and further allocation will see css_is_dying().
476 		 */
477 		xas_lock_irq(&xas);
478 		mlru = xas_store(&xas, NULL);
479 		xas_unlock_irq(&xas);
480 		if (!mlru)
481 			continue;
482 
483 		/*
484 		 * With Xarray value set to NULL, holding the lru lock below
485 		 * prevents list_lru_{add,del,isolate} from touching the lru,
486 		 * safe to reparent.
487 		 */
488 		for_each_node(i)
489 			memcg_reparent_list_lru_one(lru, i, &mlru->node[i], parent);
490 
491 		/*
492 		 * Here all list_lrus corresponding to the cgroup are guaranteed
493 		 * to remain empty, we can safely free this lru, any further
494 		 * memcg_list_lru_alloc() call will simply bail out.
495 		 */
496 		kvfree_rcu(mlru, rcu);
497 	}
498 	mutex_unlock(&list_lrus_mutex);
499 }
500 
501 static inline bool memcg_list_lru_allocated(struct mem_cgroup *memcg,
502 					    struct list_lru *lru)
503 {
504 	int idx = memcg->kmemcg_id;
505 
506 	return idx < 0 || xa_load(&lru->xa, idx);
507 }
508 
509 int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru,
510 			 gfp_t gfp)
511 {
512 	unsigned long flags;
513 	struct list_lru_memcg *mlru;
514 	struct mem_cgroup *pos, *parent;
515 	XA_STATE(xas, &lru->xa, 0);
516 
517 	if (!list_lru_memcg_aware(lru) || memcg_list_lru_allocated(memcg, lru))
518 		return 0;
519 
520 	gfp &= GFP_RECLAIM_MASK;
521 	/*
522 	 * Because the list_lru can be reparented to the parent cgroup's
523 	 * list_lru, we should make sure that this cgroup and all its
524 	 * ancestors have allocated list_lru_memcg.
525 	 */
526 	do {
527 		/*
528 		 * Keep finding the farest parent that wasn't populated
529 		 * until found memcg itself.
530 		 */
531 		pos = memcg;
532 		parent = parent_mem_cgroup(pos);
533 		while (!memcg_list_lru_allocated(parent, lru)) {
534 			pos = parent;
535 			parent = parent_mem_cgroup(pos);
536 		}
537 
538 		mlru = memcg_init_list_lru_one(lru, gfp);
539 		if (!mlru)
540 			return -ENOMEM;
541 		xas_set(&xas, pos->kmemcg_id);
542 		do {
543 			xas_lock_irqsave(&xas, flags);
544 			if (!xas_load(&xas) && !css_is_dying(&pos->css)) {
545 				xas_store(&xas, mlru);
546 				if (!xas_error(&xas))
547 					mlru = NULL;
548 			}
549 			xas_unlock_irqrestore(&xas, flags);
550 		} while (xas_nomem(&xas, gfp));
551 		if (mlru)
552 			kfree(mlru);
553 	} while (pos != memcg && !css_is_dying(&pos->css));
554 
555 	return xas_error(&xas);
556 }
557 #else
558 static inline void memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
559 {
560 }
561 
562 static void memcg_destroy_list_lru(struct list_lru *lru)
563 {
564 }
565 #endif /* CONFIG_MEMCG */
566 
567 int __list_lru_init(struct list_lru *lru, bool memcg_aware, struct shrinker *shrinker)
568 {
569 	int i;
570 
571 #ifdef CONFIG_MEMCG
572 	if (shrinker)
573 		lru->shrinker_id = shrinker->id;
574 	else
575 		lru->shrinker_id = -1;
576 
577 	if (mem_cgroup_kmem_disabled())
578 		memcg_aware = false;
579 #endif
580 
581 	lru->node = kcalloc(nr_node_ids, sizeof(*lru->node), GFP_KERNEL);
582 	if (!lru->node)
583 		return -ENOMEM;
584 
585 	for_each_node(i)
586 		init_one_lru(lru, &lru->node[i].lru);
587 
588 	memcg_init_list_lru(lru, memcg_aware);
589 	list_lru_register(lru);
590 
591 	return 0;
592 }
593 EXPORT_SYMBOL_GPL(__list_lru_init);
594 
595 void list_lru_destroy(struct list_lru *lru)
596 {
597 	/* Already destroyed or not yet initialized? */
598 	if (!lru->node)
599 		return;
600 
601 	list_lru_unregister(lru);
602 
603 	memcg_destroy_list_lru(lru);
604 	kfree(lru->node);
605 	lru->node = NULL;
606 
607 #ifdef CONFIG_MEMCG
608 	lru->shrinker_id = -1;
609 #endif
610 }
611 EXPORT_SYMBOL_GPL(list_lru_destroy);
612