xref: /linux/mm/list_lru.c (revision 1623bc27a85a93e82194c8d077eccc464efa67db)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved.
4  * Authors: David Chinner and Glauber Costa
5  *
6  * Generic LRU infrastructure
7  */
8 #include <linux/kernel.h>
9 #include <linux/module.h>
10 #include <linux/mm.h>
11 #include <linux/list_lru.h>
12 #include <linux/slab.h>
13 #include <linux/mutex.h>
14 #include <linux/memcontrol.h>
15 #include "slab.h"
16 #include "internal.h"
17 
18 #ifdef CONFIG_MEMCG
19 static LIST_HEAD(memcg_list_lrus);
20 static DEFINE_MUTEX(list_lrus_mutex);
21 
22 static inline bool list_lru_memcg_aware(struct list_lru *lru)
23 {
24 	return lru->memcg_aware;
25 }
26 
27 static void list_lru_register(struct list_lru *lru)
28 {
29 	if (!list_lru_memcg_aware(lru))
30 		return;
31 
32 	mutex_lock(&list_lrus_mutex);
33 	list_add(&lru->list, &memcg_list_lrus);
34 	mutex_unlock(&list_lrus_mutex);
35 }
36 
37 static void list_lru_unregister(struct list_lru *lru)
38 {
39 	if (!list_lru_memcg_aware(lru))
40 		return;
41 
42 	mutex_lock(&list_lrus_mutex);
43 	list_del(&lru->list);
44 	mutex_unlock(&list_lrus_mutex);
45 }
46 
47 static int lru_shrinker_id(struct list_lru *lru)
48 {
49 	return lru->shrinker_id;
50 }
51 
52 static inline struct list_lru_one *
53 list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx)
54 {
55 	if (list_lru_memcg_aware(lru) && idx >= 0) {
56 		struct list_lru_memcg *mlru = xa_load(&lru->xa, idx);
57 
58 		return mlru ? &mlru->node[nid] : NULL;
59 	}
60 	return &lru->node[nid].lru;
61 }
62 
63 static inline struct list_lru_one *
64 lock_list_lru_of_memcg(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
65 		       bool irq, bool skip_empty)
66 {
67 	struct list_lru_one *l;
68 	long nr_items;
69 
70 	rcu_read_lock();
71 again:
72 	l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg));
73 	if (likely(l)) {
74 		if (irq)
75 			spin_lock_irq(&l->lock);
76 		else
77 			spin_lock(&l->lock);
78 		nr_items = READ_ONCE(l->nr_items);
79 		if (likely(nr_items != LONG_MIN)) {
80 			rcu_read_unlock();
81 			return l;
82 		}
83 		if (irq)
84 			spin_unlock_irq(&l->lock);
85 		else
86 			spin_unlock(&l->lock);
87 	}
88 	/*
89 	 * Caller may simply bail out if raced with reparenting or
90 	 * may iterate through the list_lru and expect empty slots.
91 	 */
92 	if (skip_empty) {
93 		rcu_read_unlock();
94 		return NULL;
95 	}
96 	VM_WARN_ON(!css_is_dying(&memcg->css));
97 	memcg = parent_mem_cgroup(memcg);
98 	goto again;
99 }
100 
101 static inline void unlock_list_lru(struct list_lru_one *l, bool irq_off)
102 {
103 	if (irq_off)
104 		spin_unlock_irq(&l->lock);
105 	else
106 		spin_unlock(&l->lock);
107 }
108 #else
109 static void list_lru_register(struct list_lru *lru)
110 {
111 }
112 
113 static void list_lru_unregister(struct list_lru *lru)
114 {
115 }
116 
117 static int lru_shrinker_id(struct list_lru *lru)
118 {
119 	return -1;
120 }
121 
122 static inline bool list_lru_memcg_aware(struct list_lru *lru)
123 {
124 	return false;
125 }
126 
127 static inline struct list_lru_one *
128 list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx)
129 {
130 	return &lru->node[nid].lru;
131 }
132 
133 static inline struct list_lru_one *
134 lock_list_lru_of_memcg(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
135 		       bool irq, bool skip_empty)
136 {
137 	struct list_lru_one *l = &lru->node[nid].lru;
138 
139 	if (irq)
140 		spin_lock_irq(&l->lock);
141 	else
142 		spin_lock(&l->lock);
143 
144 	return l;
145 }
146 
147 static inline void unlock_list_lru(struct list_lru_one *l, bool irq_off)
148 {
149 	if (irq_off)
150 		spin_unlock_irq(&l->lock);
151 	else
152 		spin_unlock(&l->lock);
153 }
154 #endif /* CONFIG_MEMCG */
155 
156 /* The caller must ensure the memcg lifetime. */
157 bool list_lru_add(struct list_lru *lru, struct list_head *item, int nid,
158 		  struct mem_cgroup *memcg)
159 {
160 	struct list_lru_node *nlru = &lru->node[nid];
161 	struct list_lru_one *l;
162 
163 	l = lock_list_lru_of_memcg(lru, nid, memcg, false, false);
164 	if (!l)
165 		return false;
166 	if (list_empty(item)) {
167 		list_add_tail(item, &l->list);
168 		/* Set shrinker bit if the first element was added */
169 		if (!l->nr_items++)
170 			set_shrinker_bit(memcg, nid, lru_shrinker_id(lru));
171 		unlock_list_lru(l, false);
172 		atomic_long_inc(&nlru->nr_items);
173 		return true;
174 	}
175 	unlock_list_lru(l, false);
176 	return false;
177 }
178 
179 bool list_lru_add_obj(struct list_lru *lru, struct list_head *item)
180 {
181 	bool ret;
182 	int nid = page_to_nid(virt_to_page(item));
183 
184 	if (list_lru_memcg_aware(lru)) {
185 		rcu_read_lock();
186 		ret = list_lru_add(lru, item, nid, mem_cgroup_from_slab_obj(item));
187 		rcu_read_unlock();
188 	} else {
189 		ret = list_lru_add(lru, item, nid, NULL);
190 	}
191 
192 	return ret;
193 }
194 EXPORT_SYMBOL_GPL(list_lru_add_obj);
195 
196 /* The caller must ensure the memcg lifetime. */
197 bool list_lru_del(struct list_lru *lru, struct list_head *item, int nid,
198 		  struct mem_cgroup *memcg)
199 {
200 	struct list_lru_node *nlru = &lru->node[nid];
201 	struct list_lru_one *l;
202 	l = lock_list_lru_of_memcg(lru, nid, memcg, false, false);
203 	if (!l)
204 		return false;
205 	if (!list_empty(item)) {
206 		list_del_init(item);
207 		l->nr_items--;
208 		unlock_list_lru(l, false);
209 		atomic_long_dec(&nlru->nr_items);
210 		return true;
211 	}
212 	unlock_list_lru(l, false);
213 	return false;
214 }
215 
216 bool list_lru_del_obj(struct list_lru *lru, struct list_head *item)
217 {
218 	bool ret;
219 	int nid = page_to_nid(virt_to_page(item));
220 
221 	if (list_lru_memcg_aware(lru)) {
222 		rcu_read_lock();
223 		ret = list_lru_del(lru, item, nid, mem_cgroup_from_slab_obj(item));
224 		rcu_read_unlock();
225 	} else {
226 		ret = list_lru_del(lru, item, nid, NULL);
227 	}
228 
229 	return ret;
230 }
231 EXPORT_SYMBOL_GPL(list_lru_del_obj);
232 
233 void list_lru_isolate(struct list_lru_one *list, struct list_head *item)
234 {
235 	list_del_init(item);
236 	list->nr_items--;
237 }
238 EXPORT_SYMBOL_GPL(list_lru_isolate);
239 
240 void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item,
241 			   struct list_head *head)
242 {
243 	list_move(item, head);
244 	list->nr_items--;
245 }
246 EXPORT_SYMBOL_GPL(list_lru_isolate_move);
247 
248 unsigned long list_lru_count_one(struct list_lru *lru,
249 				 int nid, struct mem_cgroup *memcg)
250 {
251 	struct list_lru_one *l;
252 	long count;
253 
254 	rcu_read_lock();
255 	l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg));
256 	count = l ? READ_ONCE(l->nr_items) : 0;
257 	rcu_read_unlock();
258 
259 	if (unlikely(count < 0))
260 		count = 0;
261 
262 	return count;
263 }
264 EXPORT_SYMBOL_GPL(list_lru_count_one);
265 
266 unsigned long list_lru_count_node(struct list_lru *lru, int nid)
267 {
268 	struct list_lru_node *nlru;
269 
270 	nlru = &lru->node[nid];
271 	return atomic_long_read(&nlru->nr_items);
272 }
273 EXPORT_SYMBOL_GPL(list_lru_count_node);
274 
275 static unsigned long
276 __list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
277 		    list_lru_walk_cb isolate, void *cb_arg,
278 		    unsigned long *nr_to_walk, bool irq_off)
279 {
280 	struct list_lru_node *nlru = &lru->node[nid];
281 	struct list_lru_one *l = NULL;
282 	struct list_head *item, *n;
283 	unsigned long isolated = 0;
284 
285 restart:
286 	l = lock_list_lru_of_memcg(lru, nid, memcg, irq_off, true);
287 	if (!l)
288 		return isolated;
289 	list_for_each_safe(item, n, &l->list) {
290 		enum lru_status ret;
291 
292 		/*
293 		 * decrement nr_to_walk first so that we don't livelock if we
294 		 * get stuck on large numbers of LRU_RETRY items
295 		 */
296 		if (!*nr_to_walk)
297 			break;
298 		--*nr_to_walk;
299 
300 		ret = isolate(item, l, cb_arg);
301 		switch (ret) {
302 		/*
303 		 * LRU_RETRY, LRU_REMOVED_RETRY and LRU_STOP will drop the lru
304 		 * lock. List traversal will have to restart from scratch.
305 		 */
306 		case LRU_RETRY:
307 			goto restart;
308 		case LRU_REMOVED_RETRY:
309 			fallthrough;
310 		case LRU_REMOVED:
311 			isolated++;
312 			atomic_long_dec(&nlru->nr_items);
313 			if (ret == LRU_REMOVED_RETRY)
314 				goto restart;
315 			break;
316 		case LRU_ROTATE:
317 			list_move_tail(item, &l->list);
318 			break;
319 		case LRU_SKIP:
320 			break;
321 		case LRU_STOP:
322 			goto out;
323 		default:
324 			BUG();
325 		}
326 	}
327 	unlock_list_lru(l, irq_off);
328 out:
329 	return isolated;
330 }
331 
332 unsigned long
333 list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
334 		  list_lru_walk_cb isolate, void *cb_arg,
335 		  unsigned long *nr_to_walk)
336 {
337 	return __list_lru_walk_one(lru, nid, memcg, isolate,
338 				   cb_arg, nr_to_walk, false);
339 }
340 EXPORT_SYMBOL_GPL(list_lru_walk_one);
341 
342 unsigned long
343 list_lru_walk_one_irq(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
344 		      list_lru_walk_cb isolate, void *cb_arg,
345 		      unsigned long *nr_to_walk)
346 {
347 	return __list_lru_walk_one(lru, nid, memcg, isolate,
348 				   cb_arg, nr_to_walk, true);
349 }
350 
351 unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
352 				 list_lru_walk_cb isolate, void *cb_arg,
353 				 unsigned long *nr_to_walk)
354 {
355 	long isolated = 0;
356 
357 	isolated += list_lru_walk_one(lru, nid, NULL, isolate, cb_arg,
358 				      nr_to_walk);
359 
360 #ifdef CONFIG_MEMCG
361 	if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) {
362 		struct list_lru_memcg *mlru;
363 		struct mem_cgroup *memcg;
364 		unsigned long index;
365 
366 		xa_for_each(&lru->xa, index, mlru) {
367 			rcu_read_lock();
368 			memcg = mem_cgroup_from_id(index);
369 			if (!mem_cgroup_tryget(memcg)) {
370 				rcu_read_unlock();
371 				continue;
372 			}
373 			rcu_read_unlock();
374 			isolated += __list_lru_walk_one(lru, nid, memcg,
375 							isolate, cb_arg,
376 							nr_to_walk, false);
377 			mem_cgroup_put(memcg);
378 
379 			if (*nr_to_walk <= 0)
380 				break;
381 		}
382 	}
383 #endif
384 
385 	return isolated;
386 }
387 EXPORT_SYMBOL_GPL(list_lru_walk_node);
388 
389 static void init_one_lru(struct list_lru *lru, struct list_lru_one *l)
390 {
391 	INIT_LIST_HEAD(&l->list);
392 	spin_lock_init(&l->lock);
393 	l->nr_items = 0;
394 #ifdef CONFIG_LOCKDEP
395 	if (lru->key)
396 		lockdep_set_class(&l->lock, lru->key);
397 #endif
398 }
399 
400 #ifdef CONFIG_MEMCG
401 static struct list_lru_memcg *memcg_init_list_lru_one(struct list_lru *lru, gfp_t gfp)
402 {
403 	int nid;
404 	struct list_lru_memcg *mlru;
405 
406 	mlru = kmalloc(struct_size(mlru, node, nr_node_ids), gfp);
407 	if (!mlru)
408 		return NULL;
409 
410 	for_each_node(nid)
411 		init_one_lru(lru, &mlru->node[nid]);
412 
413 	return mlru;
414 }
415 
416 static inline void memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
417 {
418 	if (memcg_aware)
419 		xa_init_flags(&lru->xa, XA_FLAGS_LOCK_IRQ);
420 	lru->memcg_aware = memcg_aware;
421 }
422 
423 static void memcg_destroy_list_lru(struct list_lru *lru)
424 {
425 	XA_STATE(xas, &lru->xa, 0);
426 	struct list_lru_memcg *mlru;
427 
428 	if (!list_lru_memcg_aware(lru))
429 		return;
430 
431 	xas_lock_irq(&xas);
432 	xas_for_each(&xas, mlru, ULONG_MAX) {
433 		kfree(mlru);
434 		xas_store(&xas, NULL);
435 	}
436 	xas_unlock_irq(&xas);
437 }
438 
439 static void memcg_reparent_list_lru_one(struct list_lru *lru, int nid,
440 					struct list_lru_one *src,
441 					struct mem_cgroup *dst_memcg)
442 {
443 	int dst_idx = dst_memcg->kmemcg_id;
444 	struct list_lru_one *dst;
445 
446 	spin_lock_irq(&src->lock);
447 	dst = list_lru_from_memcg_idx(lru, nid, dst_idx);
448 	spin_lock_nested(&dst->lock, SINGLE_DEPTH_NESTING);
449 
450 	list_splice_init(&src->list, &dst->list);
451 	if (src->nr_items) {
452 		WARN_ON(src->nr_items < 0);
453 		dst->nr_items += src->nr_items;
454 		set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru));
455 	}
456 	/* Mark the list_lru_one dead */
457 	src->nr_items = LONG_MIN;
458 
459 	spin_unlock(&dst->lock);
460 	spin_unlock_irq(&src->lock);
461 }
462 
463 void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *parent)
464 {
465 	struct list_lru *lru;
466 	int i;
467 
468 	mutex_lock(&list_lrus_mutex);
469 	list_for_each_entry(lru, &memcg_list_lrus, list) {
470 		struct list_lru_memcg *mlru;
471 		XA_STATE(xas, &lru->xa, memcg->kmemcg_id);
472 
473 		/*
474 		 * Lock the Xarray to ensure no on going list_lru_memcg
475 		 * allocation and further allocation will see css_is_dying().
476 		 */
477 		xas_lock_irq(&xas);
478 		mlru = xas_store(&xas, NULL);
479 		xas_unlock_irq(&xas);
480 		if (!mlru)
481 			continue;
482 
483 		/*
484 		 * With Xarray value set to NULL, holding the lru lock below
485 		 * prevents list_lru_{add,del,isolate} from touching the lru,
486 		 * safe to reparent.
487 		 */
488 		for_each_node(i)
489 			memcg_reparent_list_lru_one(lru, i, &mlru->node[i], parent);
490 
491 		/*
492 		 * Here all list_lrus corresponding to the cgroup are guaranteed
493 		 * to remain empty, we can safely free this lru, any further
494 		 * memcg_list_lru_alloc() call will simply bail out.
495 		 */
496 		kvfree_rcu(mlru, rcu);
497 	}
498 	mutex_unlock(&list_lrus_mutex);
499 }
500 
501 static inline bool memcg_list_lru_allocated(struct mem_cgroup *memcg,
502 					    struct list_lru *lru)
503 {
504 	int idx = memcg->kmemcg_id;
505 
506 	return idx < 0 || xa_load(&lru->xa, idx);
507 }
508 
509 int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru,
510 			 gfp_t gfp)
511 {
512 	unsigned long flags;
513 	struct list_lru_memcg *mlru;
514 	struct mem_cgroup *pos, *parent;
515 	XA_STATE(xas, &lru->xa, 0);
516 
517 	if (!list_lru_memcg_aware(lru) || memcg_list_lru_allocated(memcg, lru))
518 		return 0;
519 
520 	gfp &= GFP_RECLAIM_MASK;
521 	/*
522 	 * Because the list_lru can be reparented to the parent cgroup's
523 	 * list_lru, we should make sure that this cgroup and all its
524 	 * ancestors have allocated list_lru_memcg.
525 	 */
526 	do {
527 		/*
528 		 * Keep finding the farest parent that wasn't populated
529 		 * until found memcg itself.
530 		 */
531 		pos = memcg;
532 		parent = parent_mem_cgroup(pos);
533 		while (!memcg_list_lru_allocated(parent, lru)) {
534 			pos = parent;
535 			parent = parent_mem_cgroup(pos);
536 		}
537 
538 		mlru = memcg_init_list_lru_one(lru, gfp);
539 		if (!mlru)
540 			return -ENOMEM;
541 		xas_set(&xas, pos->kmemcg_id);
542 		do {
543 			xas_lock_irqsave(&xas, flags);
544 			if (!xas_load(&xas) && !css_is_dying(&pos->css)) {
545 				xas_store(&xas, mlru);
546 				if (!xas_error(&xas))
547 					mlru = NULL;
548 			}
549 			xas_unlock_irqrestore(&xas, flags);
550 		} while (xas_nomem(&xas, gfp));
551 		if (mlru)
552 			kfree(mlru);
553 	} while (pos != memcg && !css_is_dying(&pos->css));
554 
555 	return xas_error(&xas);
556 }
557 #else
558 static inline void memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
559 {
560 }
561 
562 static void memcg_destroy_list_lru(struct list_lru *lru)
563 {
564 }
565 #endif /* CONFIG_MEMCG */
566 
567 int __list_lru_init(struct list_lru *lru, bool memcg_aware, struct shrinker *shrinker)
568 {
569 	int i;
570 
571 #ifdef CONFIG_MEMCG
572 	if (shrinker)
573 		lru->shrinker_id = shrinker->id;
574 	else
575 		lru->shrinker_id = -1;
576 
577 	if (mem_cgroup_kmem_disabled())
578 		memcg_aware = false;
579 #endif
580 
581 	lru->node = kcalloc(nr_node_ids, sizeof(*lru->node), GFP_KERNEL);
582 	if (!lru->node)
583 		return -ENOMEM;
584 
585 	for_each_node(i)
586 		init_one_lru(lru, &lru->node[i].lru);
587 
588 	memcg_init_list_lru(lru, memcg_aware);
589 	list_lru_register(lru);
590 
591 	return 0;
592 }
593 EXPORT_SYMBOL_GPL(__list_lru_init);
594 
595 void list_lru_destroy(struct list_lru *lru)
596 {
597 	/* Already destroyed or not yet initialized? */
598 	if (!lru->node)
599 		return;
600 
601 	list_lru_unregister(lru);
602 
603 	memcg_destroy_list_lru(lru);
604 	kfree(lru->node);
605 	lru->node = NULL;
606 
607 #ifdef CONFIG_MEMCG
608 	lru->shrinker_id = -1;
609 #endif
610 }
611 EXPORT_SYMBOL_GPL(list_lru_destroy);
612