1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2018 HUAWEI, Inc. 4 * https://www.huawei.com/ 5 */ 6 #include "internal.h" 7 8 struct z_erofs_gbuf { 9 spinlock_t lock; 10 void *ptr; 11 struct page **pages; 12 unsigned int nrpages; 13 }; 14 15 static struct z_erofs_gbuf *z_erofs_gbufpool, *z_erofs_rsvbuf; 16 static unsigned int z_erofs_gbuf_count, z_erofs_gbuf_nrpages, 17 z_erofs_rsv_nrpages; 18 19 module_param_named(global_buffers, z_erofs_gbuf_count, uint, 0444); 20 module_param_named(reserved_pages, z_erofs_rsv_nrpages, uint, 0444); 21 22 static atomic_long_t erofs_global_shrink_cnt; /* for all mounted instances */ 23 /* protected by 'erofs_sb_list_lock' */ 24 static unsigned int shrinker_run_no; 25 26 /* protects the mounted 'erofs_sb_list' */ 27 static DEFINE_SPINLOCK(erofs_sb_list_lock); 28 static LIST_HEAD(erofs_sb_list); 29 static struct shrinker *erofs_shrinker_info; 30 31 static unsigned int z_erofs_gbuf_id(void) 32 { 33 return raw_smp_processor_id() % z_erofs_gbuf_count; 34 } 35 36 void *z_erofs_get_gbuf(unsigned int requiredpages) 37 __acquires(gbuf->lock) 38 { 39 struct z_erofs_gbuf *gbuf; 40 41 migrate_disable(); 42 gbuf = &z_erofs_gbufpool[z_erofs_gbuf_id()]; 43 spin_lock(&gbuf->lock); 44 /* check if the buffer is too small */ 45 if (requiredpages > gbuf->nrpages) { 46 spin_unlock(&gbuf->lock); 47 migrate_enable(); 48 /* (for sparse checker) pretend gbuf->lock is still taken */ 49 __acquire(gbuf->lock); 50 return NULL; 51 } 52 return gbuf->ptr; 53 } 54 55 void z_erofs_put_gbuf(void *ptr) __releases(gbuf->lock) 56 { 57 struct z_erofs_gbuf *gbuf; 58 59 gbuf = &z_erofs_gbufpool[z_erofs_gbuf_id()]; 60 DBG_BUGON(gbuf->ptr != ptr); 61 spin_unlock(&gbuf->lock); 62 migrate_enable(); 63 } 64 65 int z_erofs_gbuf_growsize(unsigned int nrpages) 66 { 67 static DEFINE_MUTEX(gbuf_resize_mutex); 68 struct page **tmp_pages = NULL; 69 struct z_erofs_gbuf *gbuf; 70 void *ptr, *old_ptr; 71 int last, i, j; 72 73 mutex_lock(&gbuf_resize_mutex); 74 /* avoid shrinking gbufs, since no idea how many fses rely on */ 75 if (nrpages <= z_erofs_gbuf_nrpages) { 76 mutex_unlock(&gbuf_resize_mutex); 77 return 0; 78 } 79 80 for (i = 0; i < z_erofs_gbuf_count; ++i) { 81 gbuf = &z_erofs_gbufpool[i]; 82 tmp_pages = kcalloc(nrpages, sizeof(*tmp_pages), GFP_KERNEL); 83 if (!tmp_pages) 84 goto out; 85 86 for (j = 0; j < gbuf->nrpages; ++j) 87 tmp_pages[j] = gbuf->pages[j]; 88 do { 89 last = j; 90 j = alloc_pages_bulk_array(GFP_KERNEL, nrpages, 91 tmp_pages); 92 if (last == j) 93 goto out; 94 } while (j != nrpages); 95 96 ptr = vmap(tmp_pages, nrpages, VM_MAP, PAGE_KERNEL); 97 if (!ptr) 98 goto out; 99 100 spin_lock(&gbuf->lock); 101 kfree(gbuf->pages); 102 gbuf->pages = tmp_pages; 103 old_ptr = gbuf->ptr; 104 gbuf->ptr = ptr; 105 gbuf->nrpages = nrpages; 106 spin_unlock(&gbuf->lock); 107 if (old_ptr) 108 vunmap(old_ptr); 109 } 110 z_erofs_gbuf_nrpages = nrpages; 111 out: 112 if (i < z_erofs_gbuf_count && tmp_pages) { 113 for (j = 0; j < nrpages; ++j) 114 if (tmp_pages[j] && tmp_pages[j] != gbuf->pages[j]) 115 __free_page(tmp_pages[j]); 116 kfree(tmp_pages); 117 } 118 mutex_unlock(&gbuf_resize_mutex); 119 return i < z_erofs_gbuf_count ? -ENOMEM : 0; 120 } 121 122 int __init z_erofs_gbuf_init(void) 123 { 124 unsigned int i, total = num_possible_cpus(); 125 126 if (z_erofs_gbuf_count) 127 total = min(z_erofs_gbuf_count, total); 128 z_erofs_gbuf_count = total; 129 130 /* The last (special) global buffer is the reserved buffer */ 131 total += !!z_erofs_rsv_nrpages; 132 133 z_erofs_gbufpool = kcalloc(total, sizeof(*z_erofs_gbufpool), 134 GFP_KERNEL); 135 if (!z_erofs_gbufpool) 136 return -ENOMEM; 137 138 if (z_erofs_rsv_nrpages) { 139 z_erofs_rsvbuf = &z_erofs_gbufpool[total - 1]; 140 z_erofs_rsvbuf->pages = kcalloc(z_erofs_rsv_nrpages, 141 sizeof(*z_erofs_rsvbuf->pages), GFP_KERNEL); 142 if (!z_erofs_rsvbuf->pages) { 143 z_erofs_rsvbuf = NULL; 144 z_erofs_rsv_nrpages = 0; 145 } 146 } 147 for (i = 0; i < total; ++i) 148 spin_lock_init(&z_erofs_gbufpool[i].lock); 149 return 0; 150 } 151 152 void z_erofs_gbuf_exit(void) 153 { 154 int i, j; 155 156 for (i = 0; i < z_erofs_gbuf_count + (!!z_erofs_rsvbuf); ++i) { 157 struct z_erofs_gbuf *gbuf = &z_erofs_gbufpool[i]; 158 159 if (gbuf->ptr) { 160 vunmap(gbuf->ptr); 161 gbuf->ptr = NULL; 162 } 163 164 if (!gbuf->pages) 165 continue; 166 167 for (j = 0; j < gbuf->nrpages; ++j) 168 if (gbuf->pages[j]) 169 put_page(gbuf->pages[j]); 170 kfree(gbuf->pages); 171 gbuf->pages = NULL; 172 } 173 kfree(z_erofs_gbufpool); 174 } 175 176 struct page *__erofs_allocpage(struct page **pagepool, gfp_t gfp, bool tryrsv) 177 { 178 struct page *page = *pagepool; 179 180 if (page) { 181 *pagepool = (struct page *)page_private(page); 182 } else if (tryrsv && z_erofs_rsvbuf && z_erofs_rsvbuf->nrpages) { 183 spin_lock(&z_erofs_rsvbuf->lock); 184 if (z_erofs_rsvbuf->nrpages) 185 page = z_erofs_rsvbuf->pages[--z_erofs_rsvbuf->nrpages]; 186 spin_unlock(&z_erofs_rsvbuf->lock); 187 } 188 if (!page) 189 page = alloc_page(gfp); 190 DBG_BUGON(page && page_ref_count(page) != 1); 191 return page; 192 } 193 194 void erofs_release_pages(struct page **pagepool) 195 { 196 while (*pagepool) { 197 struct page *page = *pagepool; 198 199 *pagepool = (struct page *)page_private(page); 200 /* try to fill reserved global pool first */ 201 if (z_erofs_rsvbuf && z_erofs_rsvbuf->nrpages < 202 z_erofs_rsv_nrpages) { 203 spin_lock(&z_erofs_rsvbuf->lock); 204 if (z_erofs_rsvbuf->nrpages < z_erofs_rsv_nrpages) { 205 z_erofs_rsvbuf->pages[z_erofs_rsvbuf->nrpages++] 206 = page; 207 spin_unlock(&z_erofs_rsvbuf->lock); 208 continue; 209 } 210 spin_unlock(&z_erofs_rsvbuf->lock); 211 } 212 put_page(page); 213 } 214 } 215 216 static bool erofs_workgroup_get(struct erofs_workgroup *grp) 217 { 218 if (lockref_get_not_zero(&grp->lockref)) 219 return true; 220 221 spin_lock(&grp->lockref.lock); 222 if (__lockref_is_dead(&grp->lockref)) { 223 spin_unlock(&grp->lockref.lock); 224 return false; 225 } 226 227 if (!grp->lockref.count++) 228 atomic_long_dec(&erofs_global_shrink_cnt); 229 spin_unlock(&grp->lockref.lock); 230 return true; 231 } 232 233 struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb, 234 pgoff_t index) 235 { 236 struct erofs_sb_info *sbi = EROFS_SB(sb); 237 struct erofs_workgroup *grp; 238 239 repeat: 240 rcu_read_lock(); 241 grp = xa_load(&sbi->managed_pslots, index); 242 if (grp) { 243 if (!erofs_workgroup_get(grp)) { 244 /* prefer to relax rcu read side */ 245 rcu_read_unlock(); 246 goto repeat; 247 } 248 249 DBG_BUGON(index != grp->index); 250 } 251 rcu_read_unlock(); 252 return grp; 253 } 254 255 struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb, 256 struct erofs_workgroup *grp) 257 { 258 struct erofs_sb_info *const sbi = EROFS_SB(sb); 259 struct erofs_workgroup *pre; 260 261 DBG_BUGON(grp->lockref.count < 1); 262 repeat: 263 xa_lock(&sbi->managed_pslots); 264 pre = __xa_cmpxchg(&sbi->managed_pslots, grp->index, 265 NULL, grp, GFP_KERNEL); 266 if (pre) { 267 if (xa_is_err(pre)) { 268 pre = ERR_PTR(xa_err(pre)); 269 } else if (!erofs_workgroup_get(pre)) { 270 /* try to legitimize the current in-tree one */ 271 xa_unlock(&sbi->managed_pslots); 272 cond_resched(); 273 goto repeat; 274 } 275 grp = pre; 276 } 277 xa_unlock(&sbi->managed_pslots); 278 return grp; 279 } 280 281 static void __erofs_workgroup_free(struct erofs_workgroup *grp) 282 { 283 atomic_long_dec(&erofs_global_shrink_cnt); 284 erofs_workgroup_free_rcu(grp); 285 } 286 287 void erofs_workgroup_put(struct erofs_workgroup *grp) 288 { 289 if (lockref_put_or_lock(&grp->lockref)) 290 return; 291 292 DBG_BUGON(__lockref_is_dead(&grp->lockref)); 293 if (grp->lockref.count == 1) 294 atomic_long_inc(&erofs_global_shrink_cnt); 295 --grp->lockref.count; 296 spin_unlock(&grp->lockref.lock); 297 } 298 299 static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, 300 struct erofs_workgroup *grp) 301 { 302 int free = false; 303 304 spin_lock(&grp->lockref.lock); 305 if (grp->lockref.count) 306 goto out; 307 308 /* 309 * Note that all cached pages should be detached before deleted from 310 * the XArray. Otherwise some cached pages could be still attached to 311 * the orphan old workgroup when the new one is available in the tree. 312 */ 313 if (erofs_try_to_free_all_cached_folios(sbi, grp)) 314 goto out; 315 316 /* 317 * It's impossible to fail after the workgroup is freezed, 318 * however in order to avoid some race conditions, add a 319 * DBG_BUGON to observe this in advance. 320 */ 321 DBG_BUGON(__xa_erase(&sbi->managed_pslots, grp->index) != grp); 322 323 lockref_mark_dead(&grp->lockref); 324 free = true; 325 out: 326 spin_unlock(&grp->lockref.lock); 327 if (free) 328 __erofs_workgroup_free(grp); 329 return free; 330 } 331 332 static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi, 333 unsigned long nr_shrink) 334 { 335 struct erofs_workgroup *grp; 336 unsigned int freed = 0; 337 unsigned long index; 338 339 xa_lock(&sbi->managed_pslots); 340 xa_for_each(&sbi->managed_pslots, index, grp) { 341 /* try to shrink each valid workgroup */ 342 if (!erofs_try_to_release_workgroup(sbi, grp)) 343 continue; 344 xa_unlock(&sbi->managed_pslots); 345 346 ++freed; 347 if (!--nr_shrink) 348 return freed; 349 xa_lock(&sbi->managed_pslots); 350 } 351 xa_unlock(&sbi->managed_pslots); 352 return freed; 353 } 354 355 void erofs_shrinker_register(struct super_block *sb) 356 { 357 struct erofs_sb_info *sbi = EROFS_SB(sb); 358 359 mutex_init(&sbi->umount_mutex); 360 361 spin_lock(&erofs_sb_list_lock); 362 list_add(&sbi->list, &erofs_sb_list); 363 spin_unlock(&erofs_sb_list_lock); 364 } 365 366 void erofs_shrinker_unregister(struct super_block *sb) 367 { 368 struct erofs_sb_info *const sbi = EROFS_SB(sb); 369 370 mutex_lock(&sbi->umount_mutex); 371 /* clean up all remaining workgroups in memory */ 372 erofs_shrink_workstation(sbi, ~0UL); 373 374 spin_lock(&erofs_sb_list_lock); 375 list_del(&sbi->list); 376 spin_unlock(&erofs_sb_list_lock); 377 mutex_unlock(&sbi->umount_mutex); 378 } 379 380 static unsigned long erofs_shrink_count(struct shrinker *shrink, 381 struct shrink_control *sc) 382 { 383 return atomic_long_read(&erofs_global_shrink_cnt); 384 } 385 386 static unsigned long erofs_shrink_scan(struct shrinker *shrink, 387 struct shrink_control *sc) 388 { 389 struct erofs_sb_info *sbi; 390 struct list_head *p; 391 392 unsigned long nr = sc->nr_to_scan; 393 unsigned int run_no; 394 unsigned long freed = 0; 395 396 spin_lock(&erofs_sb_list_lock); 397 do { 398 run_no = ++shrinker_run_no; 399 } while (run_no == 0); 400 401 /* Iterate over all mounted superblocks and try to shrink them */ 402 p = erofs_sb_list.next; 403 while (p != &erofs_sb_list) { 404 sbi = list_entry(p, struct erofs_sb_info, list); 405 406 /* 407 * We move the ones we do to the end of the list, so we stop 408 * when we see one we have already done. 409 */ 410 if (sbi->shrinker_run_no == run_no) 411 break; 412 413 if (!mutex_trylock(&sbi->umount_mutex)) { 414 p = p->next; 415 continue; 416 } 417 418 spin_unlock(&erofs_sb_list_lock); 419 sbi->shrinker_run_no = run_no; 420 421 freed += erofs_shrink_workstation(sbi, nr - freed); 422 423 spin_lock(&erofs_sb_list_lock); 424 /* Get the next list element before we move this one */ 425 p = p->next; 426 427 /* 428 * Move this one to the end of the list to provide some 429 * fairness. 430 */ 431 list_move_tail(&sbi->list, &erofs_sb_list); 432 mutex_unlock(&sbi->umount_mutex); 433 434 if (freed >= nr) 435 break; 436 } 437 spin_unlock(&erofs_sb_list_lock); 438 return freed; 439 } 440 441 int __init erofs_init_shrinker(void) 442 { 443 erofs_shrinker_info = shrinker_alloc(0, "erofs-shrinker"); 444 if (!erofs_shrinker_info) 445 return -ENOMEM; 446 447 erofs_shrinker_info->count_objects = erofs_shrink_count; 448 erofs_shrinker_info->scan_objects = erofs_shrink_scan; 449 shrinker_register(erofs_shrinker_info); 450 return 0; 451 } 452 453 void erofs_exit_shrinker(void) 454 { 455 shrinker_free(erofs_shrinker_info); 456 } 457