1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/memcontrol.h> 3 #include <linux/rwsem.h> 4 #include <linux/shrinker.h> 5 #include <linux/rculist.h> 6 #include <trace/events/vmscan.h> 7 8 #include "internal.h" 9 10 LIST_HEAD(shrinker_list); 11 DEFINE_MUTEX(shrinker_mutex); 12 13 #ifdef CONFIG_MEMCG 14 static int shrinker_nr_max; 15 16 static inline int shrinker_unit_size(int nr_items) 17 { 18 return (DIV_ROUND_UP(nr_items, SHRINKER_UNIT_BITS) * sizeof(struct shrinker_info_unit *)); 19 } 20 21 static inline void shrinker_unit_free(struct shrinker_info *info, int start) 22 { 23 struct shrinker_info_unit **unit; 24 int nr, i; 25 26 if (!info) 27 return; 28 29 unit = info->unit; 30 nr = DIV_ROUND_UP(info->map_nr_max, SHRINKER_UNIT_BITS); 31 32 for (i = start; i < nr; i++) { 33 if (!unit[i]) 34 break; 35 36 kfree(unit[i]); 37 unit[i] = NULL; 38 } 39 } 40 41 static inline int shrinker_unit_alloc(struct shrinker_info *new, 42 struct shrinker_info *old, int nid) 43 { 44 struct shrinker_info_unit *unit; 45 int nr = DIV_ROUND_UP(new->map_nr_max, SHRINKER_UNIT_BITS); 46 int start = old ? DIV_ROUND_UP(old->map_nr_max, SHRINKER_UNIT_BITS) : 0; 47 int i; 48 49 for (i = start; i < nr; i++) { 50 unit = kzalloc_node(sizeof(*unit), GFP_KERNEL, nid); 51 if (!unit) { 52 shrinker_unit_free(new, start); 53 return -ENOMEM; 54 } 55 56 new->unit[i] = unit; 57 } 58 59 return 0; 60 } 61 62 void free_shrinker_info(struct mem_cgroup *memcg) 63 { 64 struct mem_cgroup_per_node *pn; 65 struct shrinker_info *info; 66 int nid; 67 68 for_each_node(nid) { 69 pn = memcg->nodeinfo[nid]; 70 info = rcu_dereference_protected(pn->shrinker_info, true); 71 shrinker_unit_free(info, 0); 72 kvfree(info); 73 rcu_assign_pointer(pn->shrinker_info, NULL); 74 } 75 } 76 77 int alloc_shrinker_info(struct mem_cgroup *memcg) 78 { 79 int nid, ret = 0; 80 int array_size = 0; 81 82 mutex_lock(&shrinker_mutex); 83 array_size = shrinker_unit_size(shrinker_nr_max); 84 for_each_node(nid) { 85 struct shrinker_info *info = kvzalloc_node(sizeof(*info) + array_size, 86 GFP_KERNEL, nid); 87 if (!info) 88 goto err; 89 info->map_nr_max = shrinker_nr_max; 90 if (shrinker_unit_alloc(info, NULL, nid)) { 91 kvfree(info); 92 goto err; 93 } 94 rcu_assign_pointer(memcg->nodeinfo[nid]->shrinker_info, info); 95 } 96 mutex_unlock(&shrinker_mutex); 97 98 return ret; 99 100 err: 101 mutex_unlock(&shrinker_mutex); 102 free_shrinker_info(memcg); 103 return -ENOMEM; 104 } 105 106 static struct shrinker_info *shrinker_info_protected(struct mem_cgroup *memcg, 107 int nid) 108 { 109 return rcu_dereference_protected(memcg->nodeinfo[nid]->shrinker_info, 110 lockdep_is_held(&shrinker_mutex)); 111 } 112 113 static int expand_one_shrinker_info(struct mem_cgroup *memcg, int new_size, 114 int old_size, int new_nr_max) 115 { 116 struct shrinker_info *new, *old; 117 struct mem_cgroup_per_node *pn; 118 int nid; 119 120 for_each_node(nid) { 121 pn = memcg->nodeinfo[nid]; 122 old = shrinker_info_protected(memcg, nid); 123 /* Not yet online memcg */ 124 if (!old) 125 return 0; 126 127 /* Already expanded this shrinker_info */ 128 if (new_nr_max <= old->map_nr_max) 129 continue; 130 131 new = kvzalloc_node(sizeof(*new) + new_size, GFP_KERNEL, nid); 132 if (!new) 133 return -ENOMEM; 134 135 new->map_nr_max = new_nr_max; 136 137 memcpy(new->unit, old->unit, old_size); 138 if (shrinker_unit_alloc(new, old, nid)) { 139 kvfree(new); 140 return -ENOMEM; 141 } 142 143 rcu_assign_pointer(pn->shrinker_info, new); 144 kvfree_rcu(old, rcu); 145 } 146 147 return 0; 148 } 149 150 static int expand_shrinker_info(int new_id) 151 { 152 int ret = 0; 153 int new_nr_max = round_up(new_id + 1, SHRINKER_UNIT_BITS); 154 int new_size, old_size = 0; 155 struct mem_cgroup *memcg; 156 157 if (!root_mem_cgroup) 158 goto out; 159 160 lockdep_assert_held(&shrinker_mutex); 161 162 new_size = shrinker_unit_size(new_nr_max); 163 old_size = shrinker_unit_size(shrinker_nr_max); 164 165 memcg = mem_cgroup_iter(NULL, NULL, NULL); 166 do { 167 ret = expand_one_shrinker_info(memcg, new_size, old_size, 168 new_nr_max); 169 if (ret) { 170 mem_cgroup_iter_break(NULL, memcg); 171 goto out; 172 } 173 } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL); 174 out: 175 if (!ret) 176 shrinker_nr_max = new_nr_max; 177 178 return ret; 179 } 180 181 static inline int shrinker_id_to_index(int shrinker_id) 182 { 183 return shrinker_id / SHRINKER_UNIT_BITS; 184 } 185 186 static inline int shrinker_id_to_offset(int shrinker_id) 187 { 188 return shrinker_id % SHRINKER_UNIT_BITS; 189 } 190 191 static inline int calc_shrinker_id(int index, int offset) 192 { 193 return index * SHRINKER_UNIT_BITS + offset; 194 } 195 196 void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id) 197 { 198 if (shrinker_id >= 0 && memcg && !mem_cgroup_is_root(memcg)) { 199 struct shrinker_info *info; 200 struct shrinker_info_unit *unit; 201 202 rcu_read_lock(); 203 info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info); 204 unit = info->unit[shrinker_id_to_index(shrinker_id)]; 205 if (!WARN_ON_ONCE(shrinker_id >= info->map_nr_max)) { 206 /* Pairs with smp mb in shrink_slab() */ 207 smp_mb__before_atomic(); 208 set_bit(shrinker_id_to_offset(shrinker_id), unit->map); 209 } 210 rcu_read_unlock(); 211 } 212 } 213 214 static DEFINE_IDR(shrinker_idr); 215 216 static int shrinker_memcg_alloc(struct shrinker *shrinker) 217 { 218 int id, ret = -ENOMEM; 219 220 if (mem_cgroup_disabled()) 221 return -ENOSYS; 222 if (mem_cgroup_kmem_disabled() && !(shrinker->flags & SHRINKER_NONSLAB)) 223 return -ENOSYS; 224 225 mutex_lock(&shrinker_mutex); 226 id = idr_alloc(&shrinker_idr, shrinker, 0, 0, GFP_KERNEL); 227 if (id < 0) 228 goto unlock; 229 230 if (id >= shrinker_nr_max) { 231 if (expand_shrinker_info(id)) { 232 idr_remove(&shrinker_idr, id); 233 goto unlock; 234 } 235 } 236 shrinker->id = id; 237 ret = 0; 238 unlock: 239 mutex_unlock(&shrinker_mutex); 240 return ret; 241 } 242 243 static void shrinker_memcg_remove(struct shrinker *shrinker) 244 { 245 int id = shrinker->id; 246 247 BUG_ON(id < 0); 248 249 lockdep_assert_held(&shrinker_mutex); 250 251 idr_remove(&shrinker_idr, id); 252 } 253 254 static long xchg_nr_deferred_memcg(int nid, struct shrinker *shrinker, 255 struct mem_cgroup *memcg) 256 { 257 struct shrinker_info *info; 258 struct shrinker_info_unit *unit; 259 long nr_deferred; 260 261 rcu_read_lock(); 262 info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info); 263 unit = info->unit[shrinker_id_to_index(shrinker->id)]; 264 nr_deferred = atomic_long_xchg(&unit->nr_deferred[shrinker_id_to_offset(shrinker->id)], 0); 265 rcu_read_unlock(); 266 267 return nr_deferred; 268 } 269 270 static long add_nr_deferred_memcg(long nr, int nid, struct shrinker *shrinker, 271 struct mem_cgroup *memcg) 272 { 273 struct shrinker_info *info; 274 struct shrinker_info_unit *unit; 275 long nr_deferred; 276 277 rcu_read_lock(); 278 info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info); 279 unit = info->unit[shrinker_id_to_index(shrinker->id)]; 280 nr_deferred = 281 atomic_long_add_return(nr, &unit->nr_deferred[shrinker_id_to_offset(shrinker->id)]); 282 rcu_read_unlock(); 283 284 return nr_deferred; 285 } 286 287 void reparent_shrinker_deferred(struct mem_cgroup *memcg) 288 { 289 int nid, index, offset; 290 long nr; 291 struct mem_cgroup *parent; 292 struct shrinker_info *child_info, *parent_info; 293 struct shrinker_info_unit *child_unit, *parent_unit; 294 295 parent = parent_mem_cgroup(memcg); 296 if (!parent) 297 parent = root_mem_cgroup; 298 299 /* Prevent from concurrent shrinker_info expand */ 300 mutex_lock(&shrinker_mutex); 301 for_each_node(nid) { 302 child_info = shrinker_info_protected(memcg, nid); 303 parent_info = shrinker_info_protected(parent, nid); 304 for (index = 0; index < shrinker_id_to_index(child_info->map_nr_max); index++) { 305 child_unit = child_info->unit[index]; 306 parent_unit = parent_info->unit[index]; 307 for (offset = 0; offset < SHRINKER_UNIT_BITS; offset++) { 308 nr = atomic_long_read(&child_unit->nr_deferred[offset]); 309 atomic_long_add(nr, &parent_unit->nr_deferred[offset]); 310 } 311 } 312 } 313 mutex_unlock(&shrinker_mutex); 314 } 315 #else 316 static int shrinker_memcg_alloc(struct shrinker *shrinker) 317 { 318 return -ENOSYS; 319 } 320 321 static void shrinker_memcg_remove(struct shrinker *shrinker) 322 { 323 } 324 325 static long xchg_nr_deferred_memcg(int nid, struct shrinker *shrinker, 326 struct mem_cgroup *memcg) 327 { 328 return 0; 329 } 330 331 static long add_nr_deferred_memcg(long nr, int nid, struct shrinker *shrinker, 332 struct mem_cgroup *memcg) 333 { 334 return 0; 335 } 336 #endif /* CONFIG_MEMCG */ 337 338 static long xchg_nr_deferred(struct shrinker *shrinker, 339 struct shrink_control *sc) 340 { 341 int nid = sc->nid; 342 343 if (!(shrinker->flags & SHRINKER_NUMA_AWARE)) 344 nid = 0; 345 346 if (sc->memcg && 347 (shrinker->flags & SHRINKER_MEMCG_AWARE)) 348 return xchg_nr_deferred_memcg(nid, shrinker, 349 sc->memcg); 350 351 return atomic_long_xchg(&shrinker->nr_deferred[nid], 0); 352 } 353 354 355 static long add_nr_deferred(long nr, struct shrinker *shrinker, 356 struct shrink_control *sc) 357 { 358 int nid = sc->nid; 359 360 if (!(shrinker->flags & SHRINKER_NUMA_AWARE)) 361 nid = 0; 362 363 if (sc->memcg && 364 (shrinker->flags & SHRINKER_MEMCG_AWARE)) 365 return add_nr_deferred_memcg(nr, nid, shrinker, 366 sc->memcg); 367 368 return atomic_long_add_return(nr, &shrinker->nr_deferred[nid]); 369 } 370 371 #define SHRINK_BATCH 128 372 373 static unsigned long do_shrink_slab(struct shrink_control *shrinkctl, 374 struct shrinker *shrinker, int priority) 375 { 376 unsigned long freed = 0; 377 unsigned long long delta; 378 long total_scan; 379 long freeable; 380 long nr; 381 long new_nr; 382 long batch_size = shrinker->batch ? shrinker->batch 383 : SHRINK_BATCH; 384 long scanned = 0, next_deferred; 385 386 freeable = shrinker->count_objects(shrinker, shrinkctl); 387 if (freeable == 0 || freeable == SHRINK_EMPTY) 388 return freeable; 389 390 /* 391 * copy the current shrinker scan count into a local variable 392 * and zero it so that other concurrent shrinker invocations 393 * don't also do this scanning work. 394 */ 395 nr = xchg_nr_deferred(shrinker, shrinkctl); 396 397 if (shrinker->seeks) { 398 delta = freeable >> priority; 399 delta *= 4; 400 do_div(delta, shrinker->seeks); 401 } else { 402 /* 403 * These objects don't require any IO to create. Trim 404 * them aggressively under memory pressure to keep 405 * them from causing refetches in the IO caches. 406 */ 407 delta = freeable / 2; 408 } 409 410 total_scan = nr >> priority; 411 total_scan += delta; 412 total_scan = min(total_scan, (2 * freeable)); 413 414 trace_mm_shrink_slab_start(shrinker, shrinkctl, nr, 415 freeable, delta, total_scan, priority, 416 shrinkctl->memcg); 417 418 /* 419 * Normally, we should not scan less than batch_size objects in one 420 * pass to avoid too frequent shrinker calls, but if the slab has less 421 * than batch_size objects in total and we are really tight on memory, 422 * we will try to reclaim all available objects, otherwise we can end 423 * up failing allocations although there are plenty of reclaimable 424 * objects spread over several slabs with usage less than the 425 * batch_size. 426 * 427 * We detect the "tight on memory" situations by looking at the total 428 * number of objects we want to scan (total_scan). If it is greater 429 * than the total number of objects on slab (freeable), we must be 430 * scanning at high prio and therefore should try to reclaim as much as 431 * possible. 432 */ 433 while (total_scan >= batch_size || 434 total_scan >= freeable) { 435 unsigned long ret; 436 unsigned long nr_to_scan = min(batch_size, total_scan); 437 438 shrinkctl->nr_to_scan = nr_to_scan; 439 shrinkctl->nr_scanned = nr_to_scan; 440 ret = shrinker->scan_objects(shrinker, shrinkctl); 441 if (ret == SHRINK_STOP) 442 break; 443 freed += ret; 444 445 count_vm_events(SLABS_SCANNED, shrinkctl->nr_scanned); 446 total_scan -= shrinkctl->nr_scanned; 447 scanned += shrinkctl->nr_scanned; 448 449 cond_resched(); 450 } 451 452 /* 453 * The deferred work is increased by any new work (delta) that wasn't 454 * done, decreased by old deferred work that was done now. 455 * 456 * And it is capped to two times of the freeable items. 457 */ 458 next_deferred = max_t(long, (nr + delta - scanned), 0); 459 next_deferred = min(next_deferred, (2 * freeable)); 460 461 /* 462 * move the unused scan count back into the shrinker in a 463 * manner that handles concurrent updates. 464 */ 465 new_nr = add_nr_deferred(next_deferred, shrinker, shrinkctl); 466 467 trace_mm_shrink_slab_end(shrinker, shrinkctl->nid, freed, nr, new_nr, total_scan, 468 shrinkctl->memcg); 469 return freed; 470 } 471 472 #ifdef CONFIG_MEMCG 473 static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid, 474 struct mem_cgroup *memcg, int priority) 475 { 476 struct shrinker_info *info; 477 unsigned long ret, freed = 0; 478 int offset, index = 0; 479 480 if (!mem_cgroup_online(memcg)) 481 return 0; 482 483 /* 484 * lockless algorithm of memcg shrink. 485 * 486 * The shrinker_info may be freed asynchronously via RCU in the 487 * expand_one_shrinker_info(), so the rcu_read_lock() needs to be used 488 * to ensure the existence of the shrinker_info. 489 * 490 * The shrinker_info_unit is never freed unless its corresponding memcg 491 * is destroyed. Here we already hold the refcount of memcg, so the 492 * memcg will not be destroyed, and of course shrinker_info_unit will 493 * not be freed. 494 * 495 * So in the memcg shrink: 496 * step 1: use rcu_read_lock() to guarantee existence of the 497 * shrinker_info. 498 * step 2: after getting shrinker_info_unit we can safely release the 499 * RCU lock. 500 * step 3: traverse the bitmap and calculate shrinker_id 501 * step 4: use rcu_read_lock() to guarantee existence of the shrinker. 502 * step 5: use shrinker_id to find the shrinker, then use 503 * shrinker_try_get() to guarantee existence of the shrinker, 504 * then we can release the RCU lock to do do_shrink_slab() that 505 * may sleep. 506 * step 6: do shrinker_put() paired with step 5 to put the refcount, 507 * if the refcount reaches 0, then wake up the waiter in 508 * shrinker_free() by calling complete(). 509 * Note: here is different from the global shrink, we don't 510 * need to acquire the RCU lock to guarantee existence of 511 * the shrinker, because we don't need to use this 512 * shrinker to traverse the next shrinker in the bitmap. 513 * step 7: we have already exited the read-side of rcu critical section 514 * before calling do_shrink_slab(), the shrinker_info may be 515 * released in expand_one_shrinker_info(), so go back to step 1 516 * to reacquire the shrinker_info. 517 */ 518 again: 519 rcu_read_lock(); 520 info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info); 521 if (unlikely(!info)) 522 goto unlock; 523 524 if (index < shrinker_id_to_index(info->map_nr_max)) { 525 struct shrinker_info_unit *unit; 526 527 unit = info->unit[index]; 528 529 rcu_read_unlock(); 530 531 for_each_set_bit(offset, unit->map, SHRINKER_UNIT_BITS) { 532 struct shrink_control sc = { 533 .gfp_mask = gfp_mask, 534 .nid = nid, 535 .memcg = memcg, 536 }; 537 struct shrinker *shrinker; 538 int shrinker_id = calc_shrinker_id(index, offset); 539 540 rcu_read_lock(); 541 shrinker = idr_find(&shrinker_idr, shrinker_id); 542 if (unlikely(!shrinker || !shrinker_try_get(shrinker))) { 543 clear_bit(offset, unit->map); 544 rcu_read_unlock(); 545 continue; 546 } 547 rcu_read_unlock(); 548 549 /* Call non-slab shrinkers even though kmem is disabled */ 550 if (!memcg_kmem_online() && 551 !(shrinker->flags & SHRINKER_NONSLAB)) { 552 clear_bit(offset, unit->map); 553 shrinker_put(shrinker); 554 continue; 555 } 556 557 ret = do_shrink_slab(&sc, shrinker, priority); 558 if (ret == SHRINK_EMPTY) { 559 clear_bit(offset, unit->map); 560 /* 561 * After the shrinker reported that it had no objects to 562 * free, but before we cleared the corresponding bit in 563 * the memcg shrinker map, a new object might have been 564 * added. To make sure, we have the bit set in this 565 * case, we invoke the shrinker one more time and reset 566 * the bit if it reports that it is not empty anymore. 567 * The memory barrier here pairs with the barrier in 568 * set_shrinker_bit(): 569 * 570 * list_lru_add() shrink_slab_memcg() 571 * list_add_tail() clear_bit() 572 * <MB> <MB> 573 * set_bit() do_shrink_slab() 574 */ 575 smp_mb__after_atomic(); 576 ret = do_shrink_slab(&sc, shrinker, priority); 577 if (ret == SHRINK_EMPTY) 578 ret = 0; 579 else 580 set_shrinker_bit(memcg, nid, shrinker_id); 581 } 582 freed += ret; 583 shrinker_put(shrinker); 584 } 585 586 index++; 587 goto again; 588 } 589 unlock: 590 rcu_read_unlock(); 591 return freed; 592 } 593 #else /* !CONFIG_MEMCG */ 594 static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid, 595 struct mem_cgroup *memcg, int priority) 596 { 597 return 0; 598 } 599 #endif /* CONFIG_MEMCG */ 600 601 /** 602 * shrink_slab - shrink slab caches 603 * @gfp_mask: allocation context 604 * @nid: node whose slab caches to target 605 * @memcg: memory cgroup whose slab caches to target 606 * @priority: the reclaim priority 607 * 608 * Call the shrink functions to age shrinkable caches. 609 * 610 * @nid is passed along to shrinkers with SHRINKER_NUMA_AWARE set, 611 * unaware shrinkers will receive a node id of 0 instead. 612 * 613 * @memcg specifies the memory cgroup to target. Unaware shrinkers 614 * are called only if it is the root cgroup. 615 * 616 * @priority is sc->priority, we take the number of objects and >> by priority 617 * in order to get the scan target. 618 * 619 * Returns the number of reclaimed slab objects. 620 */ 621 unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg, 622 int priority) 623 { 624 unsigned long ret, freed = 0; 625 struct shrinker *shrinker; 626 627 /* 628 * The root memcg might be allocated even though memcg is disabled 629 * via "cgroup_disable=memory" boot parameter. This could make 630 * mem_cgroup_is_root() return false, then just run memcg slab 631 * shrink, but skip global shrink. This may result in premature 632 * oom. 633 */ 634 if (!mem_cgroup_disabled() && !mem_cgroup_is_root(memcg)) 635 return shrink_slab_memcg(gfp_mask, nid, memcg, priority); 636 637 /* 638 * lockless algorithm of global shrink. 639 * 640 * In the unregistration setp, the shrinker will be freed asynchronously 641 * via RCU after its refcount reaches 0. So both rcu_read_lock() and 642 * shrinker_try_get() can be used to ensure the existence of the shrinker. 643 * 644 * So in the global shrink: 645 * step 1: use rcu_read_lock() to guarantee existence of the shrinker 646 * and the validity of the shrinker_list walk. 647 * step 2: use shrinker_try_get() to try get the refcount, if successful, 648 * then the existence of the shrinker can also be guaranteed, 649 * so we can release the RCU lock to do do_shrink_slab() that 650 * may sleep. 651 * step 3: *MUST* to reacquire the RCU lock before calling shrinker_put(), 652 * which ensures that neither this shrinker nor the next shrinker 653 * will be freed in the next traversal operation. 654 * step 4: do shrinker_put() paired with step 2 to put the refcount, 655 * if the refcount reaches 0, then wake up the waiter in 656 * shrinker_free() by calling complete(). 657 */ 658 rcu_read_lock(); 659 list_for_each_entry_rcu(shrinker, &shrinker_list, list) { 660 struct shrink_control sc = { 661 .gfp_mask = gfp_mask, 662 .nid = nid, 663 .memcg = memcg, 664 }; 665 666 if (!shrinker_try_get(shrinker)) 667 continue; 668 669 rcu_read_unlock(); 670 671 ret = do_shrink_slab(&sc, shrinker, priority); 672 if (ret == SHRINK_EMPTY) 673 ret = 0; 674 freed += ret; 675 676 rcu_read_lock(); 677 shrinker_put(shrinker); 678 } 679 680 rcu_read_unlock(); 681 cond_resched(); 682 return freed; 683 } 684 685 struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...) 686 { 687 struct shrinker *shrinker; 688 unsigned int size; 689 va_list ap; 690 int err; 691 692 shrinker = kzalloc_obj(struct shrinker); 693 if (!shrinker) 694 return NULL; 695 696 va_start(ap, fmt); 697 err = shrinker_debugfs_name_alloc(shrinker, fmt, ap); 698 va_end(ap); 699 if (err) 700 goto err_name; 701 702 shrinker->flags = flags | SHRINKER_ALLOCATED; 703 shrinker->seeks = DEFAULT_SEEKS; 704 705 if (flags & SHRINKER_MEMCG_AWARE) { 706 err = shrinker_memcg_alloc(shrinker); 707 if (err == -ENOSYS) { 708 /* Memcg is not supported, fallback to non-memcg-aware shrinker. */ 709 shrinker->flags &= ~SHRINKER_MEMCG_AWARE; 710 goto non_memcg; 711 } 712 713 if (err) 714 goto err_flags; 715 716 return shrinker; 717 } 718 719 non_memcg: 720 /* 721 * The nr_deferred is available on per memcg level for memcg aware 722 * shrinkers, so only allocate nr_deferred in the following cases: 723 * - non-memcg-aware shrinkers 724 * - !CONFIG_MEMCG 725 * - memcg is disabled by kernel command line 726 * - non-slab shrinkers: when memcg kmem is disabled 727 */ 728 size = sizeof(*shrinker->nr_deferred); 729 if (flags & SHRINKER_NUMA_AWARE) 730 size *= nr_node_ids; 731 732 shrinker->nr_deferred = kzalloc(size, GFP_KERNEL); 733 if (!shrinker->nr_deferred) 734 goto err_flags; 735 736 return shrinker; 737 738 err_flags: 739 shrinker_debugfs_name_free(shrinker); 740 err_name: 741 kfree(shrinker); 742 return NULL; 743 } 744 EXPORT_SYMBOL_GPL(shrinker_alloc); 745 746 void shrinker_register(struct shrinker *shrinker) 747 { 748 if (unlikely(!(shrinker->flags & SHRINKER_ALLOCATED))) { 749 pr_warn("Must use shrinker_alloc() to dynamically allocate the shrinker"); 750 return; 751 } 752 753 mutex_lock(&shrinker_mutex); 754 list_add_tail_rcu(&shrinker->list, &shrinker_list); 755 shrinker->flags |= SHRINKER_REGISTERED; 756 shrinker_debugfs_add(shrinker); 757 mutex_unlock(&shrinker_mutex); 758 759 init_completion(&shrinker->done); 760 /* 761 * Now the shrinker is fully set up, take the first reference to it to 762 * indicate that lookup operations are now allowed to use it via 763 * shrinker_try_get(). 764 */ 765 refcount_set(&shrinker->refcount, 1); 766 } 767 EXPORT_SYMBOL_GPL(shrinker_register); 768 769 static void shrinker_free_rcu_cb(struct rcu_head *head) 770 { 771 struct shrinker *shrinker = container_of(head, struct shrinker, rcu); 772 773 kfree(shrinker->nr_deferred); 774 kfree(shrinker); 775 } 776 777 void shrinker_free(struct shrinker *shrinker) 778 { 779 struct dentry *debugfs_entry = NULL; 780 int debugfs_id; 781 782 if (!shrinker) 783 return; 784 785 if (shrinker->flags & SHRINKER_REGISTERED) { 786 /* drop the initial refcount */ 787 shrinker_put(shrinker); 788 /* 789 * Wait for all lookups of the shrinker to complete, after that, 790 * no shrinker is running or will run again, then we can safely 791 * free it asynchronously via RCU and safely free the structure 792 * where the shrinker is located, such as super_block etc. 793 */ 794 wait_for_completion(&shrinker->done); 795 } 796 797 mutex_lock(&shrinker_mutex); 798 if (shrinker->flags & SHRINKER_REGISTERED) { 799 /* 800 * Now we can safely remove it from the shrinker_list and then 801 * free it. 802 */ 803 list_del_rcu(&shrinker->list); 804 debugfs_entry = shrinker_debugfs_detach(shrinker, &debugfs_id); 805 shrinker->flags &= ~SHRINKER_REGISTERED; 806 } 807 808 shrinker_debugfs_name_free(shrinker); 809 810 if (shrinker->flags & SHRINKER_MEMCG_AWARE) 811 shrinker_memcg_remove(shrinker); 812 mutex_unlock(&shrinker_mutex); 813 814 if (debugfs_entry) 815 shrinker_debugfs_remove(debugfs_entry, debugfs_id); 816 817 call_rcu(&shrinker->rcu, shrinker_free_rcu_cb); 818 } 819 EXPORT_SYMBOL_GPL(shrinker_free); 820