1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2018 HUAWEI, Inc. 4 * https://www.huawei.com/ 5 */ 6 #include "internal.h" 7 8 struct z_erofs_gbuf { 9 spinlock_t lock; 10 void *ptr; 11 struct page **pages; 12 unsigned int nrpages; 13 }; 14 15 static struct z_erofs_gbuf *z_erofs_gbufpool, *z_erofs_rsvbuf; 16 static unsigned int z_erofs_gbuf_count, z_erofs_gbuf_nrpages, 17 z_erofs_rsv_nrpages; 18 19 module_param_named(global_buffers, z_erofs_gbuf_count, uint, 0444); 20 module_param_named(reserved_pages, z_erofs_rsv_nrpages, uint, 0444); 21 22 static atomic_long_t erofs_global_shrink_cnt; /* for all mounted instances */ 23 /* protected by 'erofs_sb_list_lock' */ 24 static unsigned int shrinker_run_no; 25 26 /* protects the mounted 'erofs_sb_list' */ 27 static DEFINE_SPINLOCK(erofs_sb_list_lock); 28 static LIST_HEAD(erofs_sb_list); 29 static struct shrinker *erofs_shrinker_info; 30 31 static unsigned int z_erofs_gbuf_id(void) 32 { 33 return raw_smp_processor_id() % z_erofs_gbuf_count; 34 } 35 36 void *z_erofs_get_gbuf(unsigned int requiredpages) 37 __acquires(gbuf->lock) 38 { 39 struct z_erofs_gbuf *gbuf; 40 41 migrate_disable(); 42 gbuf = &z_erofs_gbufpool[z_erofs_gbuf_id()]; 43 spin_lock(&gbuf->lock); 44 /* check if the buffer is too small */ 45 if (requiredpages > gbuf->nrpages) { 46 spin_unlock(&gbuf->lock); 47 migrate_enable(); 48 /* (for sparse checker) pretend gbuf->lock is still taken */ 49 __acquire(gbuf->lock); 50 return NULL; 51 } 52 return gbuf->ptr; 53 } 54 55 void z_erofs_put_gbuf(void *ptr) __releases(gbuf->lock) 56 { 57 struct z_erofs_gbuf *gbuf; 58 59 gbuf = &z_erofs_gbufpool[z_erofs_gbuf_id()]; 60 DBG_BUGON(gbuf->ptr != ptr); 61 spin_unlock(&gbuf->lock); 62 migrate_enable(); 63 } 64 65 int z_erofs_gbuf_growsize(unsigned int nrpages) 66 { 67 static DEFINE_MUTEX(gbuf_resize_mutex); 68 struct page **tmp_pages = NULL; 69 struct z_erofs_gbuf *gbuf; 70 void *ptr, *old_ptr; 71 int last, i, j; 72 73 mutex_lock(&gbuf_resize_mutex); 74 /* avoid shrinking gbufs, since no idea how many fses rely on */ 75 if (nrpages <= z_erofs_gbuf_nrpages) { 76 mutex_unlock(&gbuf_resize_mutex); 77 return 0; 78 } 79 80 for (i = 0; i < z_erofs_gbuf_count; ++i) { 81 gbuf = &z_erofs_gbufpool[i]; 82 tmp_pages = kcalloc(nrpages, sizeof(*tmp_pages), GFP_KERNEL); 83 if (!tmp_pages) 84 goto out; 85 86 for (j = 0; j < gbuf->nrpages; ++j) 87 tmp_pages[j] = gbuf->pages[j]; 88 do { 89 last = j; 90 j = alloc_pages_bulk_array(GFP_KERNEL, nrpages, 91 tmp_pages); 92 if (last == j) 93 goto out; 94 } while (j != nrpages); 95 96 ptr = vmap(tmp_pages, nrpages, VM_MAP, PAGE_KERNEL); 97 if (!ptr) 98 goto out; 99 100 spin_lock(&gbuf->lock); 101 kfree(gbuf->pages); 102 gbuf->pages = tmp_pages; 103 old_ptr = gbuf->ptr; 104 gbuf->ptr = ptr; 105 gbuf->nrpages = nrpages; 106 spin_unlock(&gbuf->lock); 107 if (old_ptr) 108 vunmap(old_ptr); 109 } 110 z_erofs_gbuf_nrpages = nrpages; 111 out: 112 if (i < z_erofs_gbuf_count && tmp_pages) { 113 for (j = 0; j < nrpages; ++j) 114 if (tmp_pages[j] && (j >= gbuf->nrpages || 115 tmp_pages[j] != gbuf->pages[j])) 116 __free_page(tmp_pages[j]); 117 kfree(tmp_pages); 118 } 119 mutex_unlock(&gbuf_resize_mutex); 120 return i < z_erofs_gbuf_count ? -ENOMEM : 0; 121 } 122 123 int __init z_erofs_gbuf_init(void) 124 { 125 unsigned int i, total = num_possible_cpus(); 126 127 if (z_erofs_gbuf_count) 128 total = min(z_erofs_gbuf_count, total); 129 z_erofs_gbuf_count = total; 130 131 /* The last (special) global buffer is the reserved buffer */ 132 total += !!z_erofs_rsv_nrpages; 133 134 z_erofs_gbufpool = kcalloc(total, sizeof(*z_erofs_gbufpool), 135 GFP_KERNEL); 136 if (!z_erofs_gbufpool) 137 return -ENOMEM; 138 139 if (z_erofs_rsv_nrpages) { 140 z_erofs_rsvbuf = &z_erofs_gbufpool[total - 1]; 141 z_erofs_rsvbuf->pages = kcalloc(z_erofs_rsv_nrpages, 142 sizeof(*z_erofs_rsvbuf->pages), GFP_KERNEL); 143 if (!z_erofs_rsvbuf->pages) { 144 z_erofs_rsvbuf = NULL; 145 z_erofs_rsv_nrpages = 0; 146 } 147 } 148 for (i = 0; i < total; ++i) 149 spin_lock_init(&z_erofs_gbufpool[i].lock); 150 return 0; 151 } 152 153 void z_erofs_gbuf_exit(void) 154 { 155 int i, j; 156 157 for (i = 0; i < z_erofs_gbuf_count + (!!z_erofs_rsvbuf); ++i) { 158 struct z_erofs_gbuf *gbuf = &z_erofs_gbufpool[i]; 159 160 if (gbuf->ptr) { 161 vunmap(gbuf->ptr); 162 gbuf->ptr = NULL; 163 } 164 165 if (!gbuf->pages) 166 continue; 167 168 for (j = 0; j < gbuf->nrpages; ++j) 169 if (gbuf->pages[j]) 170 put_page(gbuf->pages[j]); 171 kfree(gbuf->pages); 172 gbuf->pages = NULL; 173 } 174 kfree(z_erofs_gbufpool); 175 } 176 177 struct page *__erofs_allocpage(struct page **pagepool, gfp_t gfp, bool tryrsv) 178 { 179 struct page *page = *pagepool; 180 181 if (page) { 182 *pagepool = (struct page *)page_private(page); 183 } else if (tryrsv && z_erofs_rsvbuf && z_erofs_rsvbuf->nrpages) { 184 spin_lock(&z_erofs_rsvbuf->lock); 185 if (z_erofs_rsvbuf->nrpages) 186 page = z_erofs_rsvbuf->pages[--z_erofs_rsvbuf->nrpages]; 187 spin_unlock(&z_erofs_rsvbuf->lock); 188 } 189 if (!page) 190 page = alloc_page(gfp); 191 DBG_BUGON(page && page_ref_count(page) != 1); 192 return page; 193 } 194 195 void erofs_release_pages(struct page **pagepool) 196 { 197 while (*pagepool) { 198 struct page *page = *pagepool; 199 200 *pagepool = (struct page *)page_private(page); 201 /* try to fill reserved global pool first */ 202 if (z_erofs_rsvbuf && z_erofs_rsvbuf->nrpages < 203 z_erofs_rsv_nrpages) { 204 spin_lock(&z_erofs_rsvbuf->lock); 205 if (z_erofs_rsvbuf->nrpages < z_erofs_rsv_nrpages) { 206 z_erofs_rsvbuf->pages[z_erofs_rsvbuf->nrpages++] 207 = page; 208 spin_unlock(&z_erofs_rsvbuf->lock); 209 continue; 210 } 211 spin_unlock(&z_erofs_rsvbuf->lock); 212 } 213 put_page(page); 214 } 215 } 216 217 static bool erofs_workgroup_get(struct erofs_workgroup *grp) 218 { 219 if (lockref_get_not_zero(&grp->lockref)) 220 return true; 221 222 spin_lock(&grp->lockref.lock); 223 if (__lockref_is_dead(&grp->lockref)) { 224 spin_unlock(&grp->lockref.lock); 225 return false; 226 } 227 228 if (!grp->lockref.count++) 229 atomic_long_dec(&erofs_global_shrink_cnt); 230 spin_unlock(&grp->lockref.lock); 231 return true; 232 } 233 234 struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb, 235 pgoff_t index) 236 { 237 struct erofs_sb_info *sbi = EROFS_SB(sb); 238 struct erofs_workgroup *grp; 239 240 repeat: 241 rcu_read_lock(); 242 grp = xa_load(&sbi->managed_pslots, index); 243 if (grp) { 244 if (!erofs_workgroup_get(grp)) { 245 /* prefer to relax rcu read side */ 246 rcu_read_unlock(); 247 goto repeat; 248 } 249 250 DBG_BUGON(index != grp->index); 251 } 252 rcu_read_unlock(); 253 return grp; 254 } 255 256 struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb, 257 struct erofs_workgroup *grp) 258 { 259 struct erofs_sb_info *const sbi = EROFS_SB(sb); 260 struct erofs_workgroup *pre; 261 262 DBG_BUGON(grp->lockref.count < 1); 263 repeat: 264 xa_lock(&sbi->managed_pslots); 265 pre = __xa_cmpxchg(&sbi->managed_pslots, grp->index, 266 NULL, grp, GFP_KERNEL); 267 if (pre) { 268 if (xa_is_err(pre)) { 269 pre = ERR_PTR(xa_err(pre)); 270 } else if (!erofs_workgroup_get(pre)) { 271 /* try to legitimize the current in-tree one */ 272 xa_unlock(&sbi->managed_pslots); 273 cond_resched(); 274 goto repeat; 275 } 276 grp = pre; 277 } 278 xa_unlock(&sbi->managed_pslots); 279 return grp; 280 } 281 282 static void __erofs_workgroup_free(struct erofs_workgroup *grp) 283 { 284 atomic_long_dec(&erofs_global_shrink_cnt); 285 erofs_workgroup_free_rcu(grp); 286 } 287 288 void erofs_workgroup_put(struct erofs_workgroup *grp) 289 { 290 if (lockref_put_or_lock(&grp->lockref)) 291 return; 292 293 DBG_BUGON(__lockref_is_dead(&grp->lockref)); 294 if (grp->lockref.count == 1) 295 atomic_long_inc(&erofs_global_shrink_cnt); 296 --grp->lockref.count; 297 spin_unlock(&grp->lockref.lock); 298 } 299 300 static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, 301 struct erofs_workgroup *grp) 302 { 303 int free = false; 304 305 spin_lock(&grp->lockref.lock); 306 if (grp->lockref.count) 307 goto out; 308 309 /* 310 * Note that all cached pages should be detached before deleted from 311 * the XArray. Otherwise some cached pages could be still attached to 312 * the orphan old workgroup when the new one is available in the tree. 313 */ 314 if (erofs_try_to_free_all_cached_folios(sbi, grp)) 315 goto out; 316 317 /* 318 * It's impossible to fail after the workgroup is freezed, 319 * however in order to avoid some race conditions, add a 320 * DBG_BUGON to observe this in advance. 321 */ 322 DBG_BUGON(__xa_erase(&sbi->managed_pslots, grp->index) != grp); 323 324 lockref_mark_dead(&grp->lockref); 325 free = true; 326 out: 327 spin_unlock(&grp->lockref.lock); 328 if (free) 329 __erofs_workgroup_free(grp); 330 return free; 331 } 332 333 static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi, 334 unsigned long nr_shrink) 335 { 336 struct erofs_workgroup *grp; 337 unsigned int freed = 0; 338 unsigned long index; 339 340 xa_lock(&sbi->managed_pslots); 341 xa_for_each(&sbi->managed_pslots, index, grp) { 342 /* try to shrink each valid workgroup */ 343 if (!erofs_try_to_release_workgroup(sbi, grp)) 344 continue; 345 xa_unlock(&sbi->managed_pslots); 346 347 ++freed; 348 if (!--nr_shrink) 349 return freed; 350 xa_lock(&sbi->managed_pslots); 351 } 352 xa_unlock(&sbi->managed_pslots); 353 return freed; 354 } 355 356 void erofs_shrinker_register(struct super_block *sb) 357 { 358 struct erofs_sb_info *sbi = EROFS_SB(sb); 359 360 mutex_init(&sbi->umount_mutex); 361 362 spin_lock(&erofs_sb_list_lock); 363 list_add(&sbi->list, &erofs_sb_list); 364 spin_unlock(&erofs_sb_list_lock); 365 } 366 367 void erofs_shrinker_unregister(struct super_block *sb) 368 { 369 struct erofs_sb_info *const sbi = EROFS_SB(sb); 370 371 mutex_lock(&sbi->umount_mutex); 372 /* clean up all remaining workgroups in memory */ 373 erofs_shrink_workstation(sbi, ~0UL); 374 375 spin_lock(&erofs_sb_list_lock); 376 list_del(&sbi->list); 377 spin_unlock(&erofs_sb_list_lock); 378 mutex_unlock(&sbi->umount_mutex); 379 } 380 381 static unsigned long erofs_shrink_count(struct shrinker *shrink, 382 struct shrink_control *sc) 383 { 384 return atomic_long_read(&erofs_global_shrink_cnt); 385 } 386 387 static unsigned long erofs_shrink_scan(struct shrinker *shrink, 388 struct shrink_control *sc) 389 { 390 struct erofs_sb_info *sbi; 391 struct list_head *p; 392 393 unsigned long nr = sc->nr_to_scan; 394 unsigned int run_no; 395 unsigned long freed = 0; 396 397 spin_lock(&erofs_sb_list_lock); 398 do { 399 run_no = ++shrinker_run_no; 400 } while (run_no == 0); 401 402 /* Iterate over all mounted superblocks and try to shrink them */ 403 p = erofs_sb_list.next; 404 while (p != &erofs_sb_list) { 405 sbi = list_entry(p, struct erofs_sb_info, list); 406 407 /* 408 * We move the ones we do to the end of the list, so we stop 409 * when we see one we have already done. 410 */ 411 if (sbi->shrinker_run_no == run_no) 412 break; 413 414 if (!mutex_trylock(&sbi->umount_mutex)) { 415 p = p->next; 416 continue; 417 } 418 419 spin_unlock(&erofs_sb_list_lock); 420 sbi->shrinker_run_no = run_no; 421 422 freed += erofs_shrink_workstation(sbi, nr - freed); 423 424 spin_lock(&erofs_sb_list_lock); 425 /* Get the next list element before we move this one */ 426 p = p->next; 427 428 /* 429 * Move this one to the end of the list to provide some 430 * fairness. 431 */ 432 list_move_tail(&sbi->list, &erofs_sb_list); 433 mutex_unlock(&sbi->umount_mutex); 434 435 if (freed >= nr) 436 break; 437 } 438 spin_unlock(&erofs_sb_list_lock); 439 return freed; 440 } 441 442 int __init erofs_init_shrinker(void) 443 { 444 erofs_shrinker_info = shrinker_alloc(0, "erofs-shrinker"); 445 if (!erofs_shrinker_info) 446 return -ENOMEM; 447 448 erofs_shrinker_info->count_objects = erofs_shrink_count; 449 erofs_shrinker_info->scan_objects = erofs_shrink_scan; 450 shrinker_register(erofs_shrinker_info); 451 return 0; 452 } 453 454 void erofs_exit_shrinker(void) 455 { 456 shrinker_free(erofs_shrinker_info); 457 } 458