1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2018 HUAWEI, Inc. 4 * https://www.huawei.com/ 5 */ 6 #include "internal.h" 7 8 struct z_erofs_gbuf { 9 spinlock_t lock; 10 void *ptr; 11 struct page **pages; 12 unsigned int nrpages; 13 }; 14 15 static struct z_erofs_gbuf *z_erofs_gbufpool, *z_erofs_rsvbuf; 16 static unsigned int z_erofs_gbuf_count, z_erofs_gbuf_nrpages, 17 z_erofs_rsv_nrpages; 18 19 module_param_named(global_buffers, z_erofs_gbuf_count, uint, 0444); 20 module_param_named(reserved_pages, z_erofs_rsv_nrpages, uint, 0444); 21 22 static atomic_long_t erofs_global_shrink_cnt; /* for all mounted instances */ 23 /* protected by 'erofs_sb_list_lock' */ 24 static unsigned int shrinker_run_no; 25 26 /* protects the mounted 'erofs_sb_list' */ 27 static DEFINE_SPINLOCK(erofs_sb_list_lock); 28 static LIST_HEAD(erofs_sb_list); 29 static struct shrinker *erofs_shrinker_info; 30 31 static unsigned int z_erofs_gbuf_id(void) 32 { 33 return raw_smp_processor_id() % z_erofs_gbuf_count; 34 } 35 36 void *z_erofs_get_gbuf(unsigned int requiredpages) 37 __acquires(gbuf->lock) 38 { 39 struct z_erofs_gbuf *gbuf; 40 41 gbuf = &z_erofs_gbufpool[z_erofs_gbuf_id()]; 42 spin_lock(&gbuf->lock); 43 /* check if the buffer is too small */ 44 if (requiredpages > gbuf->nrpages) { 45 spin_unlock(&gbuf->lock); 46 /* (for sparse checker) pretend gbuf->lock is still taken */ 47 __acquire(gbuf->lock); 48 return NULL; 49 } 50 return gbuf->ptr; 51 } 52 53 void z_erofs_put_gbuf(void *ptr) __releases(gbuf->lock) 54 { 55 struct z_erofs_gbuf *gbuf; 56 57 gbuf = &z_erofs_gbufpool[z_erofs_gbuf_id()]; 58 DBG_BUGON(gbuf->ptr != ptr); 59 spin_unlock(&gbuf->lock); 60 } 61 62 int z_erofs_gbuf_growsize(unsigned int nrpages) 63 { 64 static DEFINE_MUTEX(gbuf_resize_mutex); 65 struct page **tmp_pages = NULL; 66 struct z_erofs_gbuf *gbuf; 67 void *ptr, *old_ptr; 68 int last, i, j; 69 70 mutex_lock(&gbuf_resize_mutex); 71 /* avoid shrinking gbufs, since no idea how many fses rely on */ 72 if (nrpages <= z_erofs_gbuf_nrpages) { 73 mutex_unlock(&gbuf_resize_mutex); 74 return 0; 75 } 76 77 for (i = 0; i < z_erofs_gbuf_count; ++i) { 78 gbuf = &z_erofs_gbufpool[i]; 79 tmp_pages = kcalloc(nrpages, sizeof(*tmp_pages), GFP_KERNEL); 80 if (!tmp_pages) 81 goto out; 82 83 for (j = 0; j < gbuf->nrpages; ++j) 84 tmp_pages[j] = gbuf->pages[j]; 85 do { 86 last = j; 87 j = alloc_pages_bulk_array(GFP_KERNEL, nrpages, 88 tmp_pages); 89 if (last == j) 90 goto out; 91 } while (j != nrpages); 92 93 ptr = vmap(tmp_pages, nrpages, VM_MAP, PAGE_KERNEL); 94 if (!ptr) 95 goto out; 96 97 spin_lock(&gbuf->lock); 98 kfree(gbuf->pages); 99 gbuf->pages = tmp_pages; 100 old_ptr = gbuf->ptr; 101 gbuf->ptr = ptr; 102 gbuf->nrpages = nrpages; 103 spin_unlock(&gbuf->lock); 104 if (old_ptr) 105 vunmap(old_ptr); 106 } 107 z_erofs_gbuf_nrpages = nrpages; 108 out: 109 if (i < z_erofs_gbuf_count && tmp_pages) { 110 for (j = 0; j < nrpages; ++j) 111 if (tmp_pages[j] && tmp_pages[j] != gbuf->pages[j]) 112 __free_page(tmp_pages[j]); 113 kfree(tmp_pages); 114 } 115 mutex_unlock(&gbuf_resize_mutex); 116 return i < z_erofs_gbuf_count ? -ENOMEM : 0; 117 } 118 119 int __init z_erofs_gbuf_init(void) 120 { 121 unsigned int i, total = num_possible_cpus(); 122 123 if (z_erofs_gbuf_count) 124 total = min(z_erofs_gbuf_count, total); 125 z_erofs_gbuf_count = total; 126 127 /* The last (special) global buffer is the reserved buffer */ 128 total += !!z_erofs_rsv_nrpages; 129 130 z_erofs_gbufpool = kcalloc(total, sizeof(*z_erofs_gbufpool), 131 GFP_KERNEL); 132 if (!z_erofs_gbufpool) 133 return -ENOMEM; 134 135 if (z_erofs_rsv_nrpages) { 136 z_erofs_rsvbuf = &z_erofs_gbufpool[total - 1]; 137 z_erofs_rsvbuf->pages = kcalloc(z_erofs_rsv_nrpages, 138 sizeof(*z_erofs_rsvbuf->pages), GFP_KERNEL); 139 if (!z_erofs_rsvbuf->pages) { 140 z_erofs_rsvbuf = NULL; 141 z_erofs_rsv_nrpages = 0; 142 } 143 } 144 for (i = 0; i < total; ++i) 145 spin_lock_init(&z_erofs_gbufpool[i].lock); 146 return 0; 147 } 148 149 void z_erofs_gbuf_exit(void) 150 { 151 int i; 152 153 for (i = 0; i < z_erofs_gbuf_count + (!!z_erofs_rsvbuf); ++i) { 154 struct z_erofs_gbuf *gbuf = &z_erofs_gbufpool[i]; 155 156 if (gbuf->ptr) { 157 vunmap(gbuf->ptr); 158 gbuf->ptr = NULL; 159 } 160 161 if (!gbuf->pages) 162 continue; 163 164 for (i = 0; i < gbuf->nrpages; ++i) 165 if (gbuf->pages[i]) 166 put_page(gbuf->pages[i]); 167 kfree(gbuf->pages); 168 gbuf->pages = NULL; 169 } 170 kfree(z_erofs_gbufpool); 171 } 172 173 struct page *__erofs_allocpage(struct page **pagepool, gfp_t gfp, bool tryrsv) 174 { 175 struct page *page = *pagepool; 176 177 if (page) { 178 *pagepool = (struct page *)page_private(page); 179 } else if (tryrsv && z_erofs_rsvbuf && z_erofs_rsvbuf->nrpages) { 180 spin_lock(&z_erofs_rsvbuf->lock); 181 if (z_erofs_rsvbuf->nrpages) 182 page = z_erofs_rsvbuf->pages[--z_erofs_rsvbuf->nrpages]; 183 spin_unlock(&z_erofs_rsvbuf->lock); 184 } 185 if (!page) 186 page = alloc_page(gfp); 187 DBG_BUGON(page && page_ref_count(page) != 1); 188 return page; 189 } 190 191 void erofs_release_pages(struct page **pagepool) 192 { 193 while (*pagepool) { 194 struct page *page = *pagepool; 195 196 *pagepool = (struct page *)page_private(page); 197 /* try to fill reserved global pool first */ 198 if (z_erofs_rsvbuf && z_erofs_rsvbuf->nrpages < 199 z_erofs_rsv_nrpages) { 200 spin_lock(&z_erofs_rsvbuf->lock); 201 if (z_erofs_rsvbuf->nrpages < z_erofs_rsv_nrpages) { 202 z_erofs_rsvbuf->pages[z_erofs_rsvbuf->nrpages++] 203 = page; 204 spin_unlock(&z_erofs_rsvbuf->lock); 205 continue; 206 } 207 spin_unlock(&z_erofs_rsvbuf->lock); 208 } 209 put_page(page); 210 } 211 } 212 213 static bool erofs_workgroup_get(struct erofs_workgroup *grp) 214 { 215 if (lockref_get_not_zero(&grp->lockref)) 216 return true; 217 218 spin_lock(&grp->lockref.lock); 219 if (__lockref_is_dead(&grp->lockref)) { 220 spin_unlock(&grp->lockref.lock); 221 return false; 222 } 223 224 if (!grp->lockref.count++) 225 atomic_long_dec(&erofs_global_shrink_cnt); 226 spin_unlock(&grp->lockref.lock); 227 return true; 228 } 229 230 struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb, 231 pgoff_t index) 232 { 233 struct erofs_sb_info *sbi = EROFS_SB(sb); 234 struct erofs_workgroup *grp; 235 236 repeat: 237 rcu_read_lock(); 238 grp = xa_load(&sbi->managed_pslots, index); 239 if (grp) { 240 if (!erofs_workgroup_get(grp)) { 241 /* prefer to relax rcu read side */ 242 rcu_read_unlock(); 243 goto repeat; 244 } 245 246 DBG_BUGON(index != grp->index); 247 } 248 rcu_read_unlock(); 249 return grp; 250 } 251 252 struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb, 253 struct erofs_workgroup *grp) 254 { 255 struct erofs_sb_info *const sbi = EROFS_SB(sb); 256 struct erofs_workgroup *pre; 257 258 DBG_BUGON(grp->lockref.count < 1); 259 repeat: 260 xa_lock(&sbi->managed_pslots); 261 pre = __xa_cmpxchg(&sbi->managed_pslots, grp->index, 262 NULL, grp, GFP_KERNEL); 263 if (pre) { 264 if (xa_is_err(pre)) { 265 pre = ERR_PTR(xa_err(pre)); 266 } else if (!erofs_workgroup_get(pre)) { 267 /* try to legitimize the current in-tree one */ 268 xa_unlock(&sbi->managed_pslots); 269 cond_resched(); 270 goto repeat; 271 } 272 grp = pre; 273 } 274 xa_unlock(&sbi->managed_pslots); 275 return grp; 276 } 277 278 static void __erofs_workgroup_free(struct erofs_workgroup *grp) 279 { 280 atomic_long_dec(&erofs_global_shrink_cnt); 281 erofs_workgroup_free_rcu(grp); 282 } 283 284 void erofs_workgroup_put(struct erofs_workgroup *grp) 285 { 286 if (lockref_put_or_lock(&grp->lockref)) 287 return; 288 289 DBG_BUGON(__lockref_is_dead(&grp->lockref)); 290 if (grp->lockref.count == 1) 291 atomic_long_inc(&erofs_global_shrink_cnt); 292 --grp->lockref.count; 293 spin_unlock(&grp->lockref.lock); 294 } 295 296 static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, 297 struct erofs_workgroup *grp) 298 { 299 int free = false; 300 301 spin_lock(&grp->lockref.lock); 302 if (grp->lockref.count) 303 goto out; 304 305 /* 306 * Note that all cached pages should be detached before deleted from 307 * the XArray. Otherwise some cached pages could be still attached to 308 * the orphan old workgroup when the new one is available in the tree. 309 */ 310 if (erofs_try_to_free_all_cached_folios(sbi, grp)) 311 goto out; 312 313 /* 314 * It's impossible to fail after the workgroup is freezed, 315 * however in order to avoid some race conditions, add a 316 * DBG_BUGON to observe this in advance. 317 */ 318 DBG_BUGON(__xa_erase(&sbi->managed_pslots, grp->index) != grp); 319 320 lockref_mark_dead(&grp->lockref); 321 free = true; 322 out: 323 spin_unlock(&grp->lockref.lock); 324 if (free) 325 __erofs_workgroup_free(grp); 326 return free; 327 } 328 329 static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi, 330 unsigned long nr_shrink) 331 { 332 struct erofs_workgroup *grp; 333 unsigned int freed = 0; 334 unsigned long index; 335 336 xa_lock(&sbi->managed_pslots); 337 xa_for_each(&sbi->managed_pslots, index, grp) { 338 /* try to shrink each valid workgroup */ 339 if (!erofs_try_to_release_workgroup(sbi, grp)) 340 continue; 341 xa_unlock(&sbi->managed_pslots); 342 343 ++freed; 344 if (!--nr_shrink) 345 return freed; 346 xa_lock(&sbi->managed_pslots); 347 } 348 xa_unlock(&sbi->managed_pslots); 349 return freed; 350 } 351 352 void erofs_shrinker_register(struct super_block *sb) 353 { 354 struct erofs_sb_info *sbi = EROFS_SB(sb); 355 356 mutex_init(&sbi->umount_mutex); 357 358 spin_lock(&erofs_sb_list_lock); 359 list_add(&sbi->list, &erofs_sb_list); 360 spin_unlock(&erofs_sb_list_lock); 361 } 362 363 void erofs_shrinker_unregister(struct super_block *sb) 364 { 365 struct erofs_sb_info *const sbi = EROFS_SB(sb); 366 367 mutex_lock(&sbi->umount_mutex); 368 /* clean up all remaining workgroups in memory */ 369 erofs_shrink_workstation(sbi, ~0UL); 370 371 spin_lock(&erofs_sb_list_lock); 372 list_del(&sbi->list); 373 spin_unlock(&erofs_sb_list_lock); 374 mutex_unlock(&sbi->umount_mutex); 375 } 376 377 static unsigned long erofs_shrink_count(struct shrinker *shrink, 378 struct shrink_control *sc) 379 { 380 return atomic_long_read(&erofs_global_shrink_cnt); 381 } 382 383 static unsigned long erofs_shrink_scan(struct shrinker *shrink, 384 struct shrink_control *sc) 385 { 386 struct erofs_sb_info *sbi; 387 struct list_head *p; 388 389 unsigned long nr = sc->nr_to_scan; 390 unsigned int run_no; 391 unsigned long freed = 0; 392 393 spin_lock(&erofs_sb_list_lock); 394 do { 395 run_no = ++shrinker_run_no; 396 } while (run_no == 0); 397 398 /* Iterate over all mounted superblocks and try to shrink them */ 399 p = erofs_sb_list.next; 400 while (p != &erofs_sb_list) { 401 sbi = list_entry(p, struct erofs_sb_info, list); 402 403 /* 404 * We move the ones we do to the end of the list, so we stop 405 * when we see one we have already done. 406 */ 407 if (sbi->shrinker_run_no == run_no) 408 break; 409 410 if (!mutex_trylock(&sbi->umount_mutex)) { 411 p = p->next; 412 continue; 413 } 414 415 spin_unlock(&erofs_sb_list_lock); 416 sbi->shrinker_run_no = run_no; 417 418 freed += erofs_shrink_workstation(sbi, nr - freed); 419 420 spin_lock(&erofs_sb_list_lock); 421 /* Get the next list element before we move this one */ 422 p = p->next; 423 424 /* 425 * Move this one to the end of the list to provide some 426 * fairness. 427 */ 428 list_move_tail(&sbi->list, &erofs_sb_list); 429 mutex_unlock(&sbi->umount_mutex); 430 431 if (freed >= nr) 432 break; 433 } 434 spin_unlock(&erofs_sb_list_lock); 435 return freed; 436 } 437 438 int __init erofs_init_shrinker(void) 439 { 440 erofs_shrinker_info = shrinker_alloc(0, "erofs-shrinker"); 441 if (!erofs_shrinker_info) 442 return -ENOMEM; 443 444 erofs_shrinker_info->count_objects = erofs_shrink_count; 445 erofs_shrinker_info->scan_objects = erofs_shrink_scan; 446 shrinker_register(erofs_shrinker_info); 447 return 0; 448 } 449 450 void erofs_exit_shrinker(void) 451 { 452 shrinker_free(erofs_shrinker_info); 453 } 454