xref: /linux/fs/erofs/zutil.c (revision 447e140e66fd226350b3ce86cffc965eaae4c856)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2018 HUAWEI, Inc.
4  *             https://www.huawei.com/
5  */
6 #include "internal.h"
7 
8 struct z_erofs_gbuf {
9 	spinlock_t lock;
10 	void *ptr;
11 	struct page **pages;
12 	unsigned int nrpages;
13 };
14 
15 static struct z_erofs_gbuf *z_erofs_gbufpool, *z_erofs_rsvbuf;
16 static unsigned int z_erofs_gbuf_count, z_erofs_gbuf_nrpages,
17 		z_erofs_rsv_nrpages;
18 
19 module_param_named(global_buffers, z_erofs_gbuf_count, uint, 0444);
20 module_param_named(reserved_pages, z_erofs_rsv_nrpages, uint, 0444);
21 
22 static atomic_long_t erofs_global_shrink_cnt;	/* for all mounted instances */
23 /* protected by 'erofs_sb_list_lock' */
24 static unsigned int shrinker_run_no;
25 
26 /* protects the mounted 'erofs_sb_list' */
27 static DEFINE_SPINLOCK(erofs_sb_list_lock);
28 static LIST_HEAD(erofs_sb_list);
29 static struct shrinker *erofs_shrinker_info;
30 
31 static unsigned int z_erofs_gbuf_id(void)
32 {
33 	return raw_smp_processor_id() % z_erofs_gbuf_count;
34 }
35 
36 void *z_erofs_get_gbuf(unsigned int requiredpages)
37 	__acquires(gbuf->lock)
38 {
39 	struct z_erofs_gbuf *gbuf;
40 
41 	gbuf = &z_erofs_gbufpool[z_erofs_gbuf_id()];
42 	spin_lock(&gbuf->lock);
43 	/* check if the buffer is too small */
44 	if (requiredpages > gbuf->nrpages) {
45 		spin_unlock(&gbuf->lock);
46 		/* (for sparse checker) pretend gbuf->lock is still taken */
47 		__acquire(gbuf->lock);
48 		return NULL;
49 	}
50 	return gbuf->ptr;
51 }
52 
53 void z_erofs_put_gbuf(void *ptr) __releases(gbuf->lock)
54 {
55 	struct z_erofs_gbuf *gbuf;
56 
57 	gbuf = &z_erofs_gbufpool[z_erofs_gbuf_id()];
58 	DBG_BUGON(gbuf->ptr != ptr);
59 	spin_unlock(&gbuf->lock);
60 }
61 
62 int z_erofs_gbuf_growsize(unsigned int nrpages)
63 {
64 	static DEFINE_MUTEX(gbuf_resize_mutex);
65 	struct page **tmp_pages = NULL;
66 	struct z_erofs_gbuf *gbuf;
67 	void *ptr, *old_ptr;
68 	int last, i, j;
69 
70 	mutex_lock(&gbuf_resize_mutex);
71 	/* avoid shrinking gbufs, since no idea how many fses rely on */
72 	if (nrpages <= z_erofs_gbuf_nrpages) {
73 		mutex_unlock(&gbuf_resize_mutex);
74 		return 0;
75 	}
76 
77 	for (i = 0; i < z_erofs_gbuf_count; ++i) {
78 		gbuf = &z_erofs_gbufpool[i];
79 		tmp_pages = kcalloc(nrpages, sizeof(*tmp_pages), GFP_KERNEL);
80 		if (!tmp_pages)
81 			goto out;
82 
83 		for (j = 0; j < gbuf->nrpages; ++j)
84 			tmp_pages[j] = gbuf->pages[j];
85 		do {
86 			last = j;
87 			j = alloc_pages_bulk_array(GFP_KERNEL, nrpages,
88 						   tmp_pages);
89 			if (last == j)
90 				goto out;
91 		} while (j != nrpages);
92 
93 		ptr = vmap(tmp_pages, nrpages, VM_MAP, PAGE_KERNEL);
94 		if (!ptr)
95 			goto out;
96 
97 		spin_lock(&gbuf->lock);
98 		kfree(gbuf->pages);
99 		gbuf->pages = tmp_pages;
100 		old_ptr = gbuf->ptr;
101 		gbuf->ptr = ptr;
102 		gbuf->nrpages = nrpages;
103 		spin_unlock(&gbuf->lock);
104 		if (old_ptr)
105 			vunmap(old_ptr);
106 	}
107 	z_erofs_gbuf_nrpages = nrpages;
108 out:
109 	if (i < z_erofs_gbuf_count && tmp_pages) {
110 		for (j = 0; j < nrpages; ++j)
111 			if (tmp_pages[j] && tmp_pages[j] != gbuf->pages[j])
112 				__free_page(tmp_pages[j]);
113 		kfree(tmp_pages);
114 	}
115 	mutex_unlock(&gbuf_resize_mutex);
116 	return i < z_erofs_gbuf_count ? -ENOMEM : 0;
117 }
118 
119 int __init z_erofs_gbuf_init(void)
120 {
121 	unsigned int i, total = num_possible_cpus();
122 
123 	if (z_erofs_gbuf_count)
124 		total = min(z_erofs_gbuf_count, total);
125 	z_erofs_gbuf_count = total;
126 
127 	/* The last (special) global buffer is the reserved buffer */
128 	total += !!z_erofs_rsv_nrpages;
129 
130 	z_erofs_gbufpool = kcalloc(total, sizeof(*z_erofs_gbufpool),
131 				   GFP_KERNEL);
132 	if (!z_erofs_gbufpool)
133 		return -ENOMEM;
134 
135 	if (z_erofs_rsv_nrpages) {
136 		z_erofs_rsvbuf = &z_erofs_gbufpool[total - 1];
137 		z_erofs_rsvbuf->pages = kcalloc(z_erofs_rsv_nrpages,
138 				sizeof(*z_erofs_rsvbuf->pages), GFP_KERNEL);
139 		if (!z_erofs_rsvbuf->pages) {
140 			z_erofs_rsvbuf = NULL;
141 			z_erofs_rsv_nrpages = 0;
142 		}
143 	}
144 	for (i = 0; i < total; ++i)
145 		spin_lock_init(&z_erofs_gbufpool[i].lock);
146 	return 0;
147 }
148 
149 void z_erofs_gbuf_exit(void)
150 {
151 	int i;
152 
153 	for (i = 0; i < z_erofs_gbuf_count + (!!z_erofs_rsvbuf); ++i) {
154 		struct z_erofs_gbuf *gbuf = &z_erofs_gbufpool[i];
155 
156 		if (gbuf->ptr) {
157 			vunmap(gbuf->ptr);
158 			gbuf->ptr = NULL;
159 		}
160 
161 		if (!gbuf->pages)
162 			continue;
163 
164 		for (i = 0; i < gbuf->nrpages; ++i)
165 			if (gbuf->pages[i])
166 				put_page(gbuf->pages[i]);
167 		kfree(gbuf->pages);
168 		gbuf->pages = NULL;
169 	}
170 	kfree(z_erofs_gbufpool);
171 }
172 
173 struct page *__erofs_allocpage(struct page **pagepool, gfp_t gfp, bool tryrsv)
174 {
175 	struct page *page = *pagepool;
176 
177 	if (page) {
178 		*pagepool = (struct page *)page_private(page);
179 	} else if (tryrsv && z_erofs_rsvbuf && z_erofs_rsvbuf->nrpages) {
180 		spin_lock(&z_erofs_rsvbuf->lock);
181 		if (z_erofs_rsvbuf->nrpages)
182 			page = z_erofs_rsvbuf->pages[--z_erofs_rsvbuf->nrpages];
183 		spin_unlock(&z_erofs_rsvbuf->lock);
184 	}
185 	if (!page)
186 		page = alloc_page(gfp);
187 	DBG_BUGON(page && page_ref_count(page) != 1);
188 	return page;
189 }
190 
191 void erofs_release_pages(struct page **pagepool)
192 {
193 	while (*pagepool) {
194 		struct page *page = *pagepool;
195 
196 		*pagepool = (struct page *)page_private(page);
197 		/* try to fill reserved global pool first */
198 		if (z_erofs_rsvbuf && z_erofs_rsvbuf->nrpages <
199 				z_erofs_rsv_nrpages) {
200 			spin_lock(&z_erofs_rsvbuf->lock);
201 			if (z_erofs_rsvbuf->nrpages < z_erofs_rsv_nrpages) {
202 				z_erofs_rsvbuf->pages[z_erofs_rsvbuf->nrpages++]
203 						= page;
204 				spin_unlock(&z_erofs_rsvbuf->lock);
205 				continue;
206 			}
207 			spin_unlock(&z_erofs_rsvbuf->lock);
208 		}
209 		put_page(page);
210 	}
211 }
212 
213 static bool erofs_workgroup_get(struct erofs_workgroup *grp)
214 {
215 	if (lockref_get_not_zero(&grp->lockref))
216 		return true;
217 
218 	spin_lock(&grp->lockref.lock);
219 	if (__lockref_is_dead(&grp->lockref)) {
220 		spin_unlock(&grp->lockref.lock);
221 		return false;
222 	}
223 
224 	if (!grp->lockref.count++)
225 		atomic_long_dec(&erofs_global_shrink_cnt);
226 	spin_unlock(&grp->lockref.lock);
227 	return true;
228 }
229 
230 struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
231 					     pgoff_t index)
232 {
233 	struct erofs_sb_info *sbi = EROFS_SB(sb);
234 	struct erofs_workgroup *grp;
235 
236 repeat:
237 	rcu_read_lock();
238 	grp = xa_load(&sbi->managed_pslots, index);
239 	if (grp) {
240 		if (!erofs_workgroup_get(grp)) {
241 			/* prefer to relax rcu read side */
242 			rcu_read_unlock();
243 			goto repeat;
244 		}
245 
246 		DBG_BUGON(index != grp->index);
247 	}
248 	rcu_read_unlock();
249 	return grp;
250 }
251 
252 struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb,
253 					       struct erofs_workgroup *grp)
254 {
255 	struct erofs_sb_info *const sbi = EROFS_SB(sb);
256 	struct erofs_workgroup *pre;
257 
258 	DBG_BUGON(grp->lockref.count < 1);
259 repeat:
260 	xa_lock(&sbi->managed_pslots);
261 	pre = __xa_cmpxchg(&sbi->managed_pslots, grp->index,
262 			   NULL, grp, GFP_KERNEL);
263 	if (pre) {
264 		if (xa_is_err(pre)) {
265 			pre = ERR_PTR(xa_err(pre));
266 		} else if (!erofs_workgroup_get(pre)) {
267 			/* try to legitimize the current in-tree one */
268 			xa_unlock(&sbi->managed_pslots);
269 			cond_resched();
270 			goto repeat;
271 		}
272 		grp = pre;
273 	}
274 	xa_unlock(&sbi->managed_pslots);
275 	return grp;
276 }
277 
278 static void  __erofs_workgroup_free(struct erofs_workgroup *grp)
279 {
280 	atomic_long_dec(&erofs_global_shrink_cnt);
281 	erofs_workgroup_free_rcu(grp);
282 }
283 
284 void erofs_workgroup_put(struct erofs_workgroup *grp)
285 {
286 	if (lockref_put_or_lock(&grp->lockref))
287 		return;
288 
289 	DBG_BUGON(__lockref_is_dead(&grp->lockref));
290 	if (grp->lockref.count == 1)
291 		atomic_long_inc(&erofs_global_shrink_cnt);
292 	--grp->lockref.count;
293 	spin_unlock(&grp->lockref.lock);
294 }
295 
296 static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
297 					   struct erofs_workgroup *grp)
298 {
299 	int free = false;
300 
301 	spin_lock(&grp->lockref.lock);
302 	if (grp->lockref.count)
303 		goto out;
304 
305 	/*
306 	 * Note that all cached pages should be detached before deleted from
307 	 * the XArray. Otherwise some cached pages could be still attached to
308 	 * the orphan old workgroup when the new one is available in the tree.
309 	 */
310 	if (erofs_try_to_free_all_cached_folios(sbi, grp))
311 		goto out;
312 
313 	/*
314 	 * It's impossible to fail after the workgroup is freezed,
315 	 * however in order to avoid some race conditions, add a
316 	 * DBG_BUGON to observe this in advance.
317 	 */
318 	DBG_BUGON(__xa_erase(&sbi->managed_pslots, grp->index) != grp);
319 
320 	lockref_mark_dead(&grp->lockref);
321 	free = true;
322 out:
323 	spin_unlock(&grp->lockref.lock);
324 	if (free)
325 		__erofs_workgroup_free(grp);
326 	return free;
327 }
328 
329 static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
330 					      unsigned long nr_shrink)
331 {
332 	struct erofs_workgroup *grp;
333 	unsigned int freed = 0;
334 	unsigned long index;
335 
336 	xa_lock(&sbi->managed_pslots);
337 	xa_for_each(&sbi->managed_pslots, index, grp) {
338 		/* try to shrink each valid workgroup */
339 		if (!erofs_try_to_release_workgroup(sbi, grp))
340 			continue;
341 		xa_unlock(&sbi->managed_pslots);
342 
343 		++freed;
344 		if (!--nr_shrink)
345 			return freed;
346 		xa_lock(&sbi->managed_pslots);
347 	}
348 	xa_unlock(&sbi->managed_pslots);
349 	return freed;
350 }
351 
352 void erofs_shrinker_register(struct super_block *sb)
353 {
354 	struct erofs_sb_info *sbi = EROFS_SB(sb);
355 
356 	mutex_init(&sbi->umount_mutex);
357 
358 	spin_lock(&erofs_sb_list_lock);
359 	list_add(&sbi->list, &erofs_sb_list);
360 	spin_unlock(&erofs_sb_list_lock);
361 }
362 
363 void erofs_shrinker_unregister(struct super_block *sb)
364 {
365 	struct erofs_sb_info *const sbi = EROFS_SB(sb);
366 
367 	mutex_lock(&sbi->umount_mutex);
368 	/* clean up all remaining workgroups in memory */
369 	erofs_shrink_workstation(sbi, ~0UL);
370 
371 	spin_lock(&erofs_sb_list_lock);
372 	list_del(&sbi->list);
373 	spin_unlock(&erofs_sb_list_lock);
374 	mutex_unlock(&sbi->umount_mutex);
375 }
376 
377 static unsigned long erofs_shrink_count(struct shrinker *shrink,
378 					struct shrink_control *sc)
379 {
380 	return atomic_long_read(&erofs_global_shrink_cnt);
381 }
382 
383 static unsigned long erofs_shrink_scan(struct shrinker *shrink,
384 				       struct shrink_control *sc)
385 {
386 	struct erofs_sb_info *sbi;
387 	struct list_head *p;
388 
389 	unsigned long nr = sc->nr_to_scan;
390 	unsigned int run_no;
391 	unsigned long freed = 0;
392 
393 	spin_lock(&erofs_sb_list_lock);
394 	do {
395 		run_no = ++shrinker_run_no;
396 	} while (run_no == 0);
397 
398 	/* Iterate over all mounted superblocks and try to shrink them */
399 	p = erofs_sb_list.next;
400 	while (p != &erofs_sb_list) {
401 		sbi = list_entry(p, struct erofs_sb_info, list);
402 
403 		/*
404 		 * We move the ones we do to the end of the list, so we stop
405 		 * when we see one we have already done.
406 		 */
407 		if (sbi->shrinker_run_no == run_no)
408 			break;
409 
410 		if (!mutex_trylock(&sbi->umount_mutex)) {
411 			p = p->next;
412 			continue;
413 		}
414 
415 		spin_unlock(&erofs_sb_list_lock);
416 		sbi->shrinker_run_no = run_no;
417 
418 		freed += erofs_shrink_workstation(sbi, nr - freed);
419 
420 		spin_lock(&erofs_sb_list_lock);
421 		/* Get the next list element before we move this one */
422 		p = p->next;
423 
424 		/*
425 		 * Move this one to the end of the list to provide some
426 		 * fairness.
427 		 */
428 		list_move_tail(&sbi->list, &erofs_sb_list);
429 		mutex_unlock(&sbi->umount_mutex);
430 
431 		if (freed >= nr)
432 			break;
433 	}
434 	spin_unlock(&erofs_sb_list_lock);
435 	return freed;
436 }
437 
438 int __init erofs_init_shrinker(void)
439 {
440 	erofs_shrinker_info = shrinker_alloc(0, "erofs-shrinker");
441 	if (!erofs_shrinker_info)
442 		return -ENOMEM;
443 
444 	erofs_shrinker_info->count_objects = erofs_shrink_count;
445 	erofs_shrinker_info->scan_objects = erofs_shrink_scan;
446 	shrinker_register(erofs_shrinker_info);
447 	return 0;
448 }
449 
450 void erofs_exit_shrinker(void)
451 {
452 	shrinker_free(erofs_shrinker_info);
453 }
454