1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/memcontrol.h> 3 #include <linux/rwsem.h> 4 #include <linux/shrinker.h> 5 #include <linux/rculist.h> 6 #include <trace/events/vmscan.h> 7 8 #include "internal.h" 9 10 LIST_HEAD(shrinker_list); 11 DEFINE_MUTEX(shrinker_mutex); 12 13 #ifdef CONFIG_MEMCG 14 static int shrinker_nr_max; 15 16 static inline int shrinker_unit_size(int nr_items) 17 { 18 return (DIV_ROUND_UP(nr_items, SHRINKER_UNIT_BITS) * sizeof(struct shrinker_info_unit *)); 19 } 20 21 static inline void shrinker_unit_free(struct shrinker_info *info, int start) 22 { 23 struct shrinker_info_unit **unit; 24 int nr, i; 25 26 if (!info) 27 return; 28 29 unit = info->unit; 30 nr = DIV_ROUND_UP(info->map_nr_max, SHRINKER_UNIT_BITS); 31 32 for (i = start; i < nr; i++) { 33 if (!unit[i]) 34 break; 35 36 kfree(unit[i]); 37 unit[i] = NULL; 38 } 39 } 40 41 static inline int shrinker_unit_alloc(struct shrinker_info *new, 42 struct shrinker_info *old, int nid) 43 { 44 struct shrinker_info_unit *unit; 45 int nr = DIV_ROUND_UP(new->map_nr_max, SHRINKER_UNIT_BITS); 46 int start = old ? DIV_ROUND_UP(old->map_nr_max, SHRINKER_UNIT_BITS) : 0; 47 int i; 48 49 for (i = start; i < nr; i++) { 50 unit = kzalloc_node(sizeof(*unit), GFP_KERNEL, nid); 51 if (!unit) { 52 shrinker_unit_free(new, start); 53 return -ENOMEM; 54 } 55 56 new->unit[i] = unit; 57 } 58 59 return 0; 60 } 61 62 void free_shrinker_info(struct mem_cgroup *memcg) 63 { 64 struct mem_cgroup_per_node *pn; 65 struct shrinker_info *info; 66 int nid; 67 68 for_each_node(nid) { 69 pn = memcg->nodeinfo[nid]; 70 info = rcu_dereference_protected(pn->shrinker_info, true); 71 shrinker_unit_free(info, 0); 72 kvfree(info); 73 rcu_assign_pointer(pn->shrinker_info, NULL); 74 } 75 } 76 77 int alloc_shrinker_info(struct mem_cgroup *memcg) 78 { 79 struct shrinker_info *info; 80 int nid, ret = 0; 81 int array_size = 0; 82 83 mutex_lock(&shrinker_mutex); 84 array_size = shrinker_unit_size(shrinker_nr_max); 85 for_each_node(nid) { 86 info = kvzalloc_node(sizeof(*info) + array_size, GFP_KERNEL, nid); 87 if (!info) 88 goto err; 89 info->map_nr_max = shrinker_nr_max; 90 if (shrinker_unit_alloc(info, NULL, nid)) 91 goto err; 92 rcu_assign_pointer(memcg->nodeinfo[nid]->shrinker_info, info); 93 } 94 mutex_unlock(&shrinker_mutex); 95 96 return ret; 97 98 err: 99 mutex_unlock(&shrinker_mutex); 100 free_shrinker_info(memcg); 101 return -ENOMEM; 102 } 103 104 static struct shrinker_info *shrinker_info_protected(struct mem_cgroup *memcg, 105 int nid) 106 { 107 return rcu_dereference_protected(memcg->nodeinfo[nid]->shrinker_info, 108 lockdep_is_held(&shrinker_mutex)); 109 } 110 111 static int expand_one_shrinker_info(struct mem_cgroup *memcg, int new_size, 112 int old_size, int new_nr_max) 113 { 114 struct shrinker_info *new, *old; 115 struct mem_cgroup_per_node *pn; 116 int nid; 117 118 for_each_node(nid) { 119 pn = memcg->nodeinfo[nid]; 120 old = shrinker_info_protected(memcg, nid); 121 /* Not yet online memcg */ 122 if (!old) 123 return 0; 124 125 /* Already expanded this shrinker_info */ 126 if (new_nr_max <= old->map_nr_max) 127 continue; 128 129 new = kvzalloc_node(sizeof(*new) + new_size, GFP_KERNEL, nid); 130 if (!new) 131 return -ENOMEM; 132 133 new->map_nr_max = new_nr_max; 134 135 memcpy(new->unit, old->unit, old_size); 136 if (shrinker_unit_alloc(new, old, nid)) { 137 kvfree(new); 138 return -ENOMEM; 139 } 140 141 rcu_assign_pointer(pn->shrinker_info, new); 142 kvfree_rcu(old, rcu); 143 } 144 145 return 0; 146 } 147 148 static int expand_shrinker_info(int new_id) 149 { 150 int ret = 0; 151 int new_nr_max = round_up(new_id + 1, SHRINKER_UNIT_BITS); 152 int new_size, old_size = 0; 153 struct mem_cgroup *memcg; 154 155 if (!root_mem_cgroup) 156 goto out; 157 158 lockdep_assert_held(&shrinker_mutex); 159 160 new_size = shrinker_unit_size(new_nr_max); 161 old_size = shrinker_unit_size(shrinker_nr_max); 162 163 memcg = mem_cgroup_iter(NULL, NULL, NULL); 164 do { 165 ret = expand_one_shrinker_info(memcg, new_size, old_size, 166 new_nr_max); 167 if (ret) { 168 mem_cgroup_iter_break(NULL, memcg); 169 goto out; 170 } 171 } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL); 172 out: 173 if (!ret) 174 shrinker_nr_max = new_nr_max; 175 176 return ret; 177 } 178 179 static inline int shrinker_id_to_index(int shrinker_id) 180 { 181 return shrinker_id / SHRINKER_UNIT_BITS; 182 } 183 184 static inline int shrinker_id_to_offset(int shrinker_id) 185 { 186 return shrinker_id % SHRINKER_UNIT_BITS; 187 } 188 189 static inline int calc_shrinker_id(int index, int offset) 190 { 191 return index * SHRINKER_UNIT_BITS + offset; 192 } 193 194 void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id) 195 { 196 if (shrinker_id >= 0 && memcg && !mem_cgroup_is_root(memcg)) { 197 struct shrinker_info *info; 198 struct shrinker_info_unit *unit; 199 200 rcu_read_lock(); 201 info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info); 202 unit = info->unit[shrinker_id_to_index(shrinker_id)]; 203 if (!WARN_ON_ONCE(shrinker_id >= info->map_nr_max)) { 204 /* Pairs with smp mb in shrink_slab() */ 205 smp_mb__before_atomic(); 206 set_bit(shrinker_id_to_offset(shrinker_id), unit->map); 207 } 208 rcu_read_unlock(); 209 } 210 } 211 212 static DEFINE_IDR(shrinker_idr); 213 214 static int shrinker_memcg_alloc(struct shrinker *shrinker) 215 { 216 int id, ret = -ENOMEM; 217 218 if (mem_cgroup_disabled()) 219 return -ENOSYS; 220 221 mutex_lock(&shrinker_mutex); 222 id = idr_alloc(&shrinker_idr, shrinker, 0, 0, GFP_KERNEL); 223 if (id < 0) 224 goto unlock; 225 226 if (id >= shrinker_nr_max) { 227 if (expand_shrinker_info(id)) { 228 idr_remove(&shrinker_idr, id); 229 goto unlock; 230 } 231 } 232 shrinker->id = id; 233 ret = 0; 234 unlock: 235 mutex_unlock(&shrinker_mutex); 236 return ret; 237 } 238 239 static void shrinker_memcg_remove(struct shrinker *shrinker) 240 { 241 int id = shrinker->id; 242 243 BUG_ON(id < 0); 244 245 lockdep_assert_held(&shrinker_mutex); 246 247 idr_remove(&shrinker_idr, id); 248 } 249 250 static long xchg_nr_deferred_memcg(int nid, struct shrinker *shrinker, 251 struct mem_cgroup *memcg) 252 { 253 struct shrinker_info *info; 254 struct shrinker_info_unit *unit; 255 long nr_deferred; 256 257 rcu_read_lock(); 258 info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info); 259 unit = info->unit[shrinker_id_to_index(shrinker->id)]; 260 nr_deferred = atomic_long_xchg(&unit->nr_deferred[shrinker_id_to_offset(shrinker->id)], 0); 261 rcu_read_unlock(); 262 263 return nr_deferred; 264 } 265 266 static long add_nr_deferred_memcg(long nr, int nid, struct shrinker *shrinker, 267 struct mem_cgroup *memcg) 268 { 269 struct shrinker_info *info; 270 struct shrinker_info_unit *unit; 271 long nr_deferred; 272 273 rcu_read_lock(); 274 info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info); 275 unit = info->unit[shrinker_id_to_index(shrinker->id)]; 276 nr_deferred = 277 atomic_long_add_return(nr, &unit->nr_deferred[shrinker_id_to_offset(shrinker->id)]); 278 rcu_read_unlock(); 279 280 return nr_deferred; 281 } 282 283 void reparent_shrinker_deferred(struct mem_cgroup *memcg) 284 { 285 int nid, index, offset; 286 long nr; 287 struct mem_cgroup *parent; 288 struct shrinker_info *child_info, *parent_info; 289 struct shrinker_info_unit *child_unit, *parent_unit; 290 291 parent = parent_mem_cgroup(memcg); 292 if (!parent) 293 parent = root_mem_cgroup; 294 295 /* Prevent from concurrent shrinker_info expand */ 296 mutex_lock(&shrinker_mutex); 297 for_each_node(nid) { 298 child_info = shrinker_info_protected(memcg, nid); 299 parent_info = shrinker_info_protected(parent, nid); 300 for (index = 0; index < shrinker_id_to_index(child_info->map_nr_max); index++) { 301 child_unit = child_info->unit[index]; 302 parent_unit = parent_info->unit[index]; 303 for (offset = 0; offset < SHRINKER_UNIT_BITS; offset++) { 304 nr = atomic_long_read(&child_unit->nr_deferred[offset]); 305 atomic_long_add(nr, &parent_unit->nr_deferred[offset]); 306 } 307 } 308 } 309 mutex_unlock(&shrinker_mutex); 310 } 311 #else 312 static int shrinker_memcg_alloc(struct shrinker *shrinker) 313 { 314 return -ENOSYS; 315 } 316 317 static void shrinker_memcg_remove(struct shrinker *shrinker) 318 { 319 } 320 321 static long xchg_nr_deferred_memcg(int nid, struct shrinker *shrinker, 322 struct mem_cgroup *memcg) 323 { 324 return 0; 325 } 326 327 static long add_nr_deferred_memcg(long nr, int nid, struct shrinker *shrinker, 328 struct mem_cgroup *memcg) 329 { 330 return 0; 331 } 332 #endif /* CONFIG_MEMCG */ 333 334 static long xchg_nr_deferred(struct shrinker *shrinker, 335 struct shrink_control *sc) 336 { 337 int nid = sc->nid; 338 339 if (!(shrinker->flags & SHRINKER_NUMA_AWARE)) 340 nid = 0; 341 342 if (sc->memcg && 343 (shrinker->flags & SHRINKER_MEMCG_AWARE)) 344 return xchg_nr_deferred_memcg(nid, shrinker, 345 sc->memcg); 346 347 return atomic_long_xchg(&shrinker->nr_deferred[nid], 0); 348 } 349 350 351 static long add_nr_deferred(long nr, struct shrinker *shrinker, 352 struct shrink_control *sc) 353 { 354 int nid = sc->nid; 355 356 if (!(shrinker->flags & SHRINKER_NUMA_AWARE)) 357 nid = 0; 358 359 if (sc->memcg && 360 (shrinker->flags & SHRINKER_MEMCG_AWARE)) 361 return add_nr_deferred_memcg(nr, nid, shrinker, 362 sc->memcg); 363 364 return atomic_long_add_return(nr, &shrinker->nr_deferred[nid]); 365 } 366 367 #define SHRINK_BATCH 128 368 369 static unsigned long do_shrink_slab(struct shrink_control *shrinkctl, 370 struct shrinker *shrinker, int priority) 371 { 372 unsigned long freed = 0; 373 unsigned long long delta; 374 long total_scan; 375 long freeable; 376 long nr; 377 long new_nr; 378 long batch_size = shrinker->batch ? shrinker->batch 379 : SHRINK_BATCH; 380 long scanned = 0, next_deferred; 381 382 freeable = shrinker->count_objects(shrinker, shrinkctl); 383 if (freeable == 0 || freeable == SHRINK_EMPTY) 384 return freeable; 385 386 /* 387 * copy the current shrinker scan count into a local variable 388 * and zero it so that other concurrent shrinker invocations 389 * don't also do this scanning work. 390 */ 391 nr = xchg_nr_deferred(shrinker, shrinkctl); 392 393 if (shrinker->seeks) { 394 delta = freeable >> priority; 395 delta *= 4; 396 do_div(delta, shrinker->seeks); 397 } else { 398 /* 399 * These objects don't require any IO to create. Trim 400 * them aggressively under memory pressure to keep 401 * them from causing refetches in the IO caches. 402 */ 403 delta = freeable / 2; 404 } 405 406 total_scan = nr >> priority; 407 total_scan += delta; 408 total_scan = min(total_scan, (2 * freeable)); 409 410 trace_mm_shrink_slab_start(shrinker, shrinkctl, nr, 411 freeable, delta, total_scan, priority); 412 413 /* 414 * Normally, we should not scan less than batch_size objects in one 415 * pass to avoid too frequent shrinker calls, but if the slab has less 416 * than batch_size objects in total and we are really tight on memory, 417 * we will try to reclaim all available objects, otherwise we can end 418 * up failing allocations although there are plenty of reclaimable 419 * objects spread over several slabs with usage less than the 420 * batch_size. 421 * 422 * We detect the "tight on memory" situations by looking at the total 423 * number of objects we want to scan (total_scan). If it is greater 424 * than the total number of objects on slab (freeable), we must be 425 * scanning at high prio and therefore should try to reclaim as much as 426 * possible. 427 */ 428 while (total_scan >= batch_size || 429 total_scan >= freeable) { 430 unsigned long ret; 431 unsigned long nr_to_scan = min(batch_size, total_scan); 432 433 shrinkctl->nr_to_scan = nr_to_scan; 434 shrinkctl->nr_scanned = nr_to_scan; 435 ret = shrinker->scan_objects(shrinker, shrinkctl); 436 if (ret == SHRINK_STOP) 437 break; 438 freed += ret; 439 440 count_vm_events(SLABS_SCANNED, shrinkctl->nr_scanned); 441 total_scan -= shrinkctl->nr_scanned; 442 scanned += shrinkctl->nr_scanned; 443 444 cond_resched(); 445 } 446 447 /* 448 * The deferred work is increased by any new work (delta) that wasn't 449 * done, decreased by old deferred work that was done now. 450 * 451 * And it is capped to two times of the freeable items. 452 */ 453 next_deferred = max_t(long, (nr + delta - scanned), 0); 454 next_deferred = min(next_deferred, (2 * freeable)); 455 456 /* 457 * move the unused scan count back into the shrinker in a 458 * manner that handles concurrent updates. 459 */ 460 new_nr = add_nr_deferred(next_deferred, shrinker, shrinkctl); 461 462 trace_mm_shrink_slab_end(shrinker, shrinkctl->nid, freed, nr, new_nr, total_scan); 463 return freed; 464 } 465 466 #ifdef CONFIG_MEMCG 467 static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid, 468 struct mem_cgroup *memcg, int priority) 469 { 470 struct shrinker_info *info; 471 unsigned long ret, freed = 0; 472 int offset, index = 0; 473 474 if (!mem_cgroup_online(memcg)) 475 return 0; 476 477 /* 478 * lockless algorithm of memcg shrink. 479 * 480 * The shrinker_info may be freed asynchronously via RCU in the 481 * expand_one_shrinker_info(), so the rcu_read_lock() needs to be used 482 * to ensure the existence of the shrinker_info. 483 * 484 * The shrinker_info_unit is never freed unless its corresponding memcg 485 * is destroyed. Here we already hold the refcount of memcg, so the 486 * memcg will not be destroyed, and of course shrinker_info_unit will 487 * not be freed. 488 * 489 * So in the memcg shrink: 490 * step 1: use rcu_read_lock() to guarantee existence of the 491 * shrinker_info. 492 * step 2: after getting shrinker_info_unit we can safely release the 493 * RCU lock. 494 * step 3: traverse the bitmap and calculate shrinker_id 495 * step 4: use rcu_read_lock() to guarantee existence of the shrinker. 496 * step 5: use shrinker_id to find the shrinker, then use 497 * shrinker_try_get() to guarantee existence of the shrinker, 498 * then we can release the RCU lock to do do_shrink_slab() that 499 * may sleep. 500 * step 6: do shrinker_put() paired with step 5 to put the refcount, 501 * if the refcount reaches 0, then wake up the waiter in 502 * shrinker_free() by calling complete(). 503 * Note: here is different from the global shrink, we don't 504 * need to acquire the RCU lock to guarantee existence of 505 * the shrinker, because we don't need to use this 506 * shrinker to traverse the next shrinker in the bitmap. 507 * step 7: we have already exited the read-side of rcu critical section 508 * before calling do_shrink_slab(), the shrinker_info may be 509 * released in expand_one_shrinker_info(), so go back to step 1 510 * to reacquire the shrinker_info. 511 */ 512 again: 513 rcu_read_lock(); 514 info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info); 515 if (unlikely(!info)) 516 goto unlock; 517 518 if (index < shrinker_id_to_index(info->map_nr_max)) { 519 struct shrinker_info_unit *unit; 520 521 unit = info->unit[index]; 522 523 rcu_read_unlock(); 524 525 for_each_set_bit(offset, unit->map, SHRINKER_UNIT_BITS) { 526 struct shrink_control sc = { 527 .gfp_mask = gfp_mask, 528 .nid = nid, 529 .memcg = memcg, 530 }; 531 struct shrinker *shrinker; 532 int shrinker_id = calc_shrinker_id(index, offset); 533 534 rcu_read_lock(); 535 shrinker = idr_find(&shrinker_idr, shrinker_id); 536 if (unlikely(!shrinker || !shrinker_try_get(shrinker))) { 537 clear_bit(offset, unit->map); 538 rcu_read_unlock(); 539 continue; 540 } 541 rcu_read_unlock(); 542 543 /* Call non-slab shrinkers even though kmem is disabled */ 544 if (!memcg_kmem_online() && 545 !(shrinker->flags & SHRINKER_NONSLAB)) 546 continue; 547 548 ret = do_shrink_slab(&sc, shrinker, priority); 549 if (ret == SHRINK_EMPTY) { 550 clear_bit(offset, unit->map); 551 /* 552 * After the shrinker reported that it had no objects to 553 * free, but before we cleared the corresponding bit in 554 * the memcg shrinker map, a new object might have been 555 * added. To make sure, we have the bit set in this 556 * case, we invoke the shrinker one more time and reset 557 * the bit if it reports that it is not empty anymore. 558 * The memory barrier here pairs with the barrier in 559 * set_shrinker_bit(): 560 * 561 * list_lru_add() shrink_slab_memcg() 562 * list_add_tail() clear_bit() 563 * <MB> <MB> 564 * set_bit() do_shrink_slab() 565 */ 566 smp_mb__after_atomic(); 567 ret = do_shrink_slab(&sc, shrinker, priority); 568 if (ret == SHRINK_EMPTY) 569 ret = 0; 570 else 571 set_shrinker_bit(memcg, nid, shrinker_id); 572 } 573 freed += ret; 574 shrinker_put(shrinker); 575 } 576 577 index++; 578 goto again; 579 } 580 unlock: 581 rcu_read_unlock(); 582 return freed; 583 } 584 #else /* !CONFIG_MEMCG */ 585 static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid, 586 struct mem_cgroup *memcg, int priority) 587 { 588 return 0; 589 } 590 #endif /* CONFIG_MEMCG */ 591 592 /** 593 * shrink_slab - shrink slab caches 594 * @gfp_mask: allocation context 595 * @nid: node whose slab caches to target 596 * @memcg: memory cgroup whose slab caches to target 597 * @priority: the reclaim priority 598 * 599 * Call the shrink functions to age shrinkable caches. 600 * 601 * @nid is passed along to shrinkers with SHRINKER_NUMA_AWARE set, 602 * unaware shrinkers will receive a node id of 0 instead. 603 * 604 * @memcg specifies the memory cgroup to target. Unaware shrinkers 605 * are called only if it is the root cgroup. 606 * 607 * @priority is sc->priority, we take the number of objects and >> by priority 608 * in order to get the scan target. 609 * 610 * Returns the number of reclaimed slab objects. 611 */ 612 unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg, 613 int priority) 614 { 615 unsigned long ret, freed = 0; 616 struct shrinker *shrinker; 617 618 /* 619 * The root memcg might be allocated even though memcg is disabled 620 * via "cgroup_disable=memory" boot parameter. This could make 621 * mem_cgroup_is_root() return false, then just run memcg slab 622 * shrink, but skip global shrink. This may result in premature 623 * oom. 624 */ 625 if (!mem_cgroup_disabled() && !mem_cgroup_is_root(memcg)) 626 return shrink_slab_memcg(gfp_mask, nid, memcg, priority); 627 628 /* 629 * lockless algorithm of global shrink. 630 * 631 * In the unregistration setp, the shrinker will be freed asynchronously 632 * via RCU after its refcount reaches 0. So both rcu_read_lock() and 633 * shrinker_try_get() can be used to ensure the existence of the shrinker. 634 * 635 * So in the global shrink: 636 * step 1: use rcu_read_lock() to guarantee existence of the shrinker 637 * and the validity of the shrinker_list walk. 638 * step 2: use shrinker_try_get() to try get the refcount, if successful, 639 * then the existence of the shrinker can also be guaranteed, 640 * so we can release the RCU lock to do do_shrink_slab() that 641 * may sleep. 642 * step 3: *MUST* to reacquire the RCU lock before calling shrinker_put(), 643 * which ensures that neither this shrinker nor the next shrinker 644 * will be freed in the next traversal operation. 645 * step 4: do shrinker_put() paired with step 2 to put the refcount, 646 * if the refcount reaches 0, then wake up the waiter in 647 * shrinker_free() by calling complete(). 648 */ 649 rcu_read_lock(); 650 list_for_each_entry_rcu(shrinker, &shrinker_list, list) { 651 struct shrink_control sc = { 652 .gfp_mask = gfp_mask, 653 .nid = nid, 654 .memcg = memcg, 655 }; 656 657 if (!shrinker_try_get(shrinker)) 658 continue; 659 660 rcu_read_unlock(); 661 662 ret = do_shrink_slab(&sc, shrinker, priority); 663 if (ret == SHRINK_EMPTY) 664 ret = 0; 665 freed += ret; 666 667 rcu_read_lock(); 668 shrinker_put(shrinker); 669 } 670 671 rcu_read_unlock(); 672 cond_resched(); 673 return freed; 674 } 675 676 struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...) 677 { 678 struct shrinker *shrinker; 679 unsigned int size; 680 va_list ap; 681 int err; 682 683 shrinker = kzalloc(sizeof(struct shrinker), GFP_KERNEL); 684 if (!shrinker) 685 return NULL; 686 687 va_start(ap, fmt); 688 err = shrinker_debugfs_name_alloc(shrinker, fmt, ap); 689 va_end(ap); 690 if (err) 691 goto err_name; 692 693 shrinker->flags = flags | SHRINKER_ALLOCATED; 694 shrinker->seeks = DEFAULT_SEEKS; 695 696 if (flags & SHRINKER_MEMCG_AWARE) { 697 err = shrinker_memcg_alloc(shrinker); 698 if (err == -ENOSYS) { 699 /* Memcg is not supported, fallback to non-memcg-aware shrinker. */ 700 shrinker->flags &= ~SHRINKER_MEMCG_AWARE; 701 goto non_memcg; 702 } 703 704 if (err) 705 goto err_flags; 706 707 return shrinker; 708 } 709 710 non_memcg: 711 /* 712 * The nr_deferred is available on per memcg level for memcg aware 713 * shrinkers, so only allocate nr_deferred in the following cases: 714 * - non-memcg-aware shrinkers 715 * - !CONFIG_MEMCG 716 * - memcg is disabled by kernel command line 717 */ 718 size = sizeof(*shrinker->nr_deferred); 719 if (flags & SHRINKER_NUMA_AWARE) 720 size *= nr_node_ids; 721 722 shrinker->nr_deferred = kzalloc(size, GFP_KERNEL); 723 if (!shrinker->nr_deferred) 724 goto err_flags; 725 726 return shrinker; 727 728 err_flags: 729 shrinker_debugfs_name_free(shrinker); 730 err_name: 731 kfree(shrinker); 732 return NULL; 733 } 734 EXPORT_SYMBOL_GPL(shrinker_alloc); 735 736 void shrinker_register(struct shrinker *shrinker) 737 { 738 if (unlikely(!(shrinker->flags & SHRINKER_ALLOCATED))) { 739 pr_warn("Must use shrinker_alloc() to dynamically allocate the shrinker"); 740 return; 741 } 742 743 mutex_lock(&shrinker_mutex); 744 list_add_tail_rcu(&shrinker->list, &shrinker_list); 745 shrinker->flags |= SHRINKER_REGISTERED; 746 shrinker_debugfs_add(shrinker); 747 mutex_unlock(&shrinker_mutex); 748 749 init_completion(&shrinker->done); 750 /* 751 * Now the shrinker is fully set up, take the first reference to it to 752 * indicate that lookup operations are now allowed to use it via 753 * shrinker_try_get(). 754 */ 755 refcount_set(&shrinker->refcount, 1); 756 } 757 EXPORT_SYMBOL_GPL(shrinker_register); 758 759 static void shrinker_free_rcu_cb(struct rcu_head *head) 760 { 761 struct shrinker *shrinker = container_of(head, struct shrinker, rcu); 762 763 kfree(shrinker->nr_deferred); 764 kfree(shrinker); 765 } 766 767 void shrinker_free(struct shrinker *shrinker) 768 { 769 struct dentry *debugfs_entry = NULL; 770 int debugfs_id; 771 772 if (!shrinker) 773 return; 774 775 if (shrinker->flags & SHRINKER_REGISTERED) { 776 /* drop the initial refcount */ 777 shrinker_put(shrinker); 778 /* 779 * Wait for all lookups of the shrinker to complete, after that, 780 * no shrinker is running or will run again, then we can safely 781 * free it asynchronously via RCU and safely free the structure 782 * where the shrinker is located, such as super_block etc. 783 */ 784 wait_for_completion(&shrinker->done); 785 } 786 787 mutex_lock(&shrinker_mutex); 788 if (shrinker->flags & SHRINKER_REGISTERED) { 789 /* 790 * Now we can safely remove it from the shrinker_list and then 791 * free it. 792 */ 793 list_del_rcu(&shrinker->list); 794 debugfs_entry = shrinker_debugfs_detach(shrinker, &debugfs_id); 795 shrinker->flags &= ~SHRINKER_REGISTERED; 796 } 797 798 shrinker_debugfs_name_free(shrinker); 799 800 if (shrinker->flags & SHRINKER_MEMCG_AWARE) 801 shrinker_memcg_remove(shrinker); 802 mutex_unlock(&shrinker_mutex); 803 804 if (debugfs_entry) 805 shrinker_debugfs_remove(debugfs_entry, debugfs_id); 806 807 call_rcu(&shrinker->rcu, shrinker_free_rcu_cb); 808 } 809 EXPORT_SYMBOL_GPL(shrinker_free); 810