Lines Matching +full:page +full:- +full:size

1 // SPDX-License-Identifier: GPL-2.0-or-later
12 * Released under the terms of 3-clause BSD License
21 * pool->migrate_lock
22 * class->lock
23 * zspage->lock
59 * span more than 1 page which avoids complex case of mapping 2 pages simply
87 #define _PFN_BITS (MAX_POSSIBLE_PHYSMEM_BITS - PAGE_SHIFT)
93 * header keeps handle which is 4byte-aligned address so we
101 #define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS)
102 #define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1)
118 * On systems with 4K page size, this gives 255 size classes! There is a
119 * trader-off here:
120 * - Large number of size classes is potentially wasteful as free page are
122 * - Small number of size classes causes large internal fragmentation
123 * - Probably its better to use specific size classes (empirically
131 #define ZS_SIZE_CLASSES (DIV_ROUND_UP(ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE, \
136 * of ->inuse objects to all objects that page can store). For example,
140 * difference between the least busy page in the group (minimum permitted
141 * number of ->inuse objects) and the most busy page (maximum permitted
142 * number of ->inuse objects) at a reasonable value.
174 * Size of objects stored in this class. Must be multiple
177 int size;
188 * For every zspage, zspage->freeobj gives head of this list.
196 * It's valid for non-allocated object
226 /* protect page/zspage migration */
248 struct page *page = alloc_page(gfp);
250 return page_zpdesc(page);
255 struct page *page = zpdesc_page(zpdesc);
257 __free_page(page);
285 zspage->huge = 1;
290 return zspage->huge;
313 name = kasprintf(GFP_KERNEL, "zs_handle-%s", pool->name);
315 return -ENOMEM;
316 pool->handle_cachep = kmem_cache_create(name, ZS_HANDLE_SIZE,
319 if (!pool->handle_cachep)
320 return -EINVAL;
322 name = kasprintf(GFP_KERNEL, "zspage-%s", pool->name);
324 return -ENOMEM;
325 pool->zspage_cachep = kmem_cache_create(name, sizeof(struct zspage),
328 if (!pool->zspage_cachep) {
329 kmem_cache_destroy(pool->handle_cachep);
330 pool->handle_cachep = NULL;
331 return -EINVAL;
339 kmem_cache_destroy(pool->handle_cachep);
340 kmem_cache_destroy(pool->zspage_cachep);
345 return (unsigned long)kmem_cache_alloc(pool->handle_cachep,
351 kmem_cache_free(pool->handle_cachep, (void *)handle);
356 return kmem_cache_zalloc(pool->zspage_cachep,
362 kmem_cache_free(pool->zspage_cachep, zspage);
365 /* class->lock(which owns the handle) synchronizes races */
390 static int zs_zpool_malloc(void *pool, size_t size, gfp_t gfp,
393 *handle = zs_malloc(pool, size, gfp);
447 MODULE_ALIAS("zpool-zsmalloc");
450 /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
460 /* Protected by class->lock */
463 return zspage->inuse;
468 zspage->inuse += val;
473 struct zpdesc *first_zpdesc = zspage->first_zpdesc;
484 return zpdesc->first_obj_offset & FIRST_OBJ_PAGE_TYPE_MASK;
493 zpdesc->first_obj_offset &= ~FIRST_OBJ_PAGE_TYPE_MASK;
494 zpdesc->first_obj_offset |= offset & FIRST_OBJ_PAGE_TYPE_MASK;
499 return zspage->freeobj;
504 zspage->freeobj = obj;
510 return pool->size_class[zspage->class];
514 * zsmalloc divides the pool into various size classes where each
517 * classes depending on its size. This function returns index of the
518 * size class which has chunk size big enough to hold the given size.
520 static int get_size_class_index(int size)
524 if (likely(size > ZS_MIN_ALLOC_SIZE))
525 idx = DIV_ROUND_UP(size - ZS_MIN_ALLOC_SIZE,
528 return min_t(int, ZS_SIZE_CLASSES - 1, idx);
534 class->stats.objs[type] += cnt;
540 class->stats.objs[type] -= cnt;
545 return class->stats.objs[type];
570 struct zs_pool *pool = s->private;
579 "class", "size", "10%", "20%", "30%", "40%",
586 class = pool->size_class[i];
588 if (class->index != i)
591 spin_lock(&class->lock);
593 seq_printf(s, " %5u %5u ", i, class->size);
602 spin_unlock(&class->lock);
604 objs_per_zspage = class->objs_per_zspage;
606 class->pages_per_zspage;
610 class->pages_per_zspage, freeable);
639 pool->stat_dentry = debugfs_create_dir(name, zs_stat_root);
641 debugfs_create_file("classes", S_IFREG | 0444, pool->stat_dentry, pool,
647 debugfs_remove_recursive(pool->stat_dentry);
670 * For each size class, zspages are divided into different groups
672 * status of the given page.
679 objs_per_zspage = class->objs_per_zspage;
688 * Take integer division into consideration: a page with one inuse
696 * Each size class maintains various freelists and zspages are assigned
706 list_add(&zspage->list, &class->fullness_list[fullness]);
707 zspage->fullness = fullness;
716 int fullness = zspage->fullness;
718 VM_BUG_ON(list_empty(&class->fullness_list[fullness]));
720 list_del_init(&zspage->list);
725 * Each size class maintains zspages in different fullness groups depending
727 * objects, the fullness status of the page can change, for instance, from
729 * checks if such a status change has occurred for the given page and
730 * accordingly moves the page from the list of the old fullness group to that
738 if (newfg == zspage->fullness)
749 struct zspage *zspage = zpdesc->zspage;
751 BUG_ON(zspage->magic != ZSPAGE_MAGIC);
762 return zpdesc->next;
766 * obj_to_location - get (<zpdesc>, <obj_idx>) from encoded object value
784 * location_to_obj - get obj value encoded from (<zpdesc>, <obj_idx>)
811 handle = zpdesc->handle;
825 struct page *page = zpdesc_page(zpdesc);
827 __ClearPageMovable(page);
828 ClearPagePrivate(page);
829 zpdesc->zspage = NULL;
830 zpdesc->next = NULL;
831 __ClearPageZsmalloc(page);
860 assert_spin_locked(&class->lock);
863 VM_BUG_ON(zspage->fullness != ZS_INUSE_RATIO_0);
878 class_stat_sub(class, ZS_OBJS_ALLOCATED, class->objs_per_zspage);
879 atomic_long_sub(class->pages_per_zspage, &pool->pages_allocated);
886 VM_BUG_ON(list_empty(&zspage->list));
890 * lock_page. The page locks trylock_zspage got will be released
919 while ((off += class->size) < PAGE_SIZE) {
920 link->next = freeobj++ << OBJ_TAG_BITS;
921 link += class->size / sizeof(*link);
926 * page, which must point to the first object on the next
927 * page (if present)
931 link->next = freeobj++ << OBJ_TAG_BITS;
937 link->next = -1UL << OBJ_TAG_BITS;
953 int nr_zpdescs = class->pages_per_zspage;
957 * 1. all pages are linked together using zpdesc->next
958 * 2. each sub-page point to zspage using zpdesc->zspage
965 zpdesc->zspage = zspage;
966 zpdesc->next = NULL;
968 zspage->first_zpdesc = zpdesc;
970 if (unlikely(class->objs_per_zspage == 1 &&
971 class->pages_per_zspage == 1))
974 prev_zpdesc->next = zpdesc;
981 * Allocate a zspage for the given size class
994 zspage->magic = ZSPAGE_MAGIC;
997 for (i = 0; i < class->pages_per_zspage; i++) {
1002 while (--i >= 0) {
1018 zspage->pool = pool;
1019 zspage->class = class->index;
1029 for (i = ZS_INUSE_RATIO_99; i >= ZS_INUSE_RATIO_0; i--) {
1030 zspage = list_first_entry_or_null(&class->fullness_list[i],
1045 if (area->vm_buf)
1047 area->vm_buf = kmalloc(ZS_MAX_ALLOC_SIZE, GFP_KERNEL);
1048 if (!area->vm_buf)
1049 return -ENOMEM;
1055 kfree(area->vm_buf);
1056 area->vm_buf = NULL;
1060 struct zpdesc *zpdescs[2], int off, int size)
1063 char *buf = area->vm_buf;
1065 /* disable page faults to match kmap_local_page() return conditions */
1069 if (area->vm_mm == ZS_MM_WO)
1072 sizes[0] = PAGE_SIZE - off;
1073 sizes[1] = size - sizes[0];
1075 /* copy object to per-cpu buffer */
1079 return area->vm_buf;
1083 struct zpdesc *zpdescs[2], int off, int size)
1089 if (area->vm_mm == ZS_MM_RO)
1092 buf = area->vm_buf;
1094 size -= ZS_HANDLE_SIZE;
1097 sizes[0] = PAGE_SIZE - off;
1098 sizes[1] = size - sizes[0];
1100 /* copy per-cpu buffer to object */
1105 /* enable page faults to match kunmap_local() return conditions */
1129 if (prev->pages_per_zspage == pages_per_zspage &&
1130 prev->objs_per_zspage == objs_per_zspage)
1138 return get_zspage_inuse(zspage) == class->objs_per_zspage;
1147 * zs_lookup_class_index() - Returns index of the zsmalloc &size_class
1148 * that hold objects of the provided size.
1150 * @size: object size
1155 * provided size.
1157 unsigned int zs_lookup_class_index(struct zs_pool *pool, unsigned int size)
1161 class = pool->size_class[get_size_class_index(size)];
1163 return class->index;
1169 return atomic_long_read(&pool->pages_allocated);
1174 * zs_map_object - get address of allocated object from handle.
1186 * This function returns with preemption and page faults disabled.
1202 * Because we use per-cpu mapping areas shared among the
1209 read_lock(&pool->migrate_lock);
1215 * migration cannot move any zpages in this zspage. Here, class->lock
1221 read_unlock(&pool->migrate_lock);
1224 off = offset_in_page(class->size * obj_idx);
1228 area->vm_mm = mm;
1229 if (off + class->size <= PAGE_SIZE) {
1230 /* this object is contained entirely within a page */
1231 area->vm_addr = kmap_local_zpdesc(zpdesc);
1232 ret = area->vm_addr + off;
1241 ret = __zs_map_object(area, zpdescs, off, class->size);
1264 off = offset_in_page(class->size * obj_idx);
1267 if (off + class->size <= PAGE_SIZE)
1268 kunmap_local(area->vm_addr);
1276 __zs_unmap_object(area, zpdescs, off, class->size);
1285 * zs_huge_class_size() - Returns the size (in bytes) of the first huge
1289 * The function returns the size of the first huge class - any object of equal
1290 * or bigger size will be stored in zspage consisting of a single physical
1291 * page.
1295 * Return: the size (in bytes) of the first huge zsmalloc &size_class.
1315 class = pool->size_class[zspage->class];
1318 offset = obj * class->size;
1328 set_freeobj(zspage, link->next >> OBJ_TAG_BITS);
1331 link->handle = handle | OBJ_ALLOCATED_TAG;
1333 zspage->first_zpdesc->handle = handle | OBJ_ALLOCATED_TAG;
1346 * zs_malloc - Allocate block of given size from pool.
1348 * @size: size of block to allocate
1353 * Allocation requests with size > ZS_MAX_ALLOC_SIZE will fail.
1355 unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
1362 if (unlikely(!size))
1363 return (unsigned long)ERR_PTR(-EINVAL);
1365 if (unlikely(size > ZS_MAX_ALLOC_SIZE))
1366 return (unsigned long)ERR_PTR(-ENOSPC);
1370 return (unsigned long)ERR_PTR(-ENOMEM);
1373 size += ZS_HANDLE_SIZE;
1374 class = pool->size_class[get_size_class_index(size)];
1376 /* class->lock effectively protects the zpage migration */
1377 spin_lock(&class->lock);
1388 spin_unlock(&class->lock);
1393 return (unsigned long)ERR_PTR(-ENOMEM);
1396 spin_lock(&class->lock);
1400 atomic_long_add(class->pages_per_zspage, &pool->pages_allocated);
1401 class_stat_add(class, ZS_OBJS_ALLOCATED, class->objs_per_zspage);
1407 spin_unlock(&class->lock);
1432 link->next = get_freeobj(zspage) << OBJ_TAG_BITS;
1434 f_zpdesc->handle = 0;
1438 mod_zspage_inuse(zspage, -1);
1453 * The pool->migrate_lock protects the race with zpage's migration
1454 * so it's safe to get the page from handle.
1456 read_lock(&pool->migrate_lock);
1461 spin_lock(&class->lock);
1462 read_unlock(&pool->migrate_lock);
1465 obj_free(class->size, obj);
1471 spin_unlock(&class->lock);
1483 int s_size, d_size, size;
1486 s_size = d_size = class->size;
1491 s_off = offset_in_page(class->size * s_objidx);
1492 d_off = offset_in_page(class->size * d_objidx);
1494 if (s_off + class->size > PAGE_SIZE)
1495 s_size = PAGE_SIZE - s_off;
1497 if (d_off + class->size > PAGE_SIZE)
1498 d_size = PAGE_SIZE - d_off;
1504 size = min(s_size, d_size);
1505 memcpy(d_addr + d_off, s_addr + s_off, size);
1506 written += size;
1508 if (written == class->size)
1511 s_off += size;
1512 s_size -= size;
1513 d_off += size;
1514 d_size -= size;
1529 s_size = class->size - written;
1537 d_size = class->size - written;
1559 offset += class->size * index;
1565 offset += class->size;
1583 struct size_class *class = pool->size_class[src_zspage->class];
1599 obj_free(class->size, used_obj);
1617 zspage = list_first_entry_or_null(&class->fullness_list[fg],
1633 for (fg = ZS_INUSE_RATIO_99; fg >= ZS_INUSE_RATIO_10; fg--) {
1634 zspage = list_first_entry_or_null(&class->fullness_list[fg],
1646 * putback_zspage - add @zspage into right class's fullness list
1648 * @zspage: target page
1674 * lock each page under migrate_read_lock(). Otherwise, the page we lock
1676 * the wrong page to unlock, so we must take a reference to the page
1708 rwlock_init(&zspage->lock);
1711 static void migrate_read_lock(struct zspage *zspage) __acquires(&zspage->lock)
1713 read_lock(&zspage->lock);
1716 static void migrate_read_unlock(struct zspage *zspage) __releases(&zspage->lock)
1718 read_unlock(&zspage->lock);
1723 write_lock(&zspage->lock);
1728 write_unlock(&zspage->lock);
1756 newzpdesc->handle = oldzpdesc->handle;
1760 static bool zs_page_isolate(struct page *page, isolate_mode_t mode)
1763 * Page is locked so zspage couldn't be destroyed. For detail, look at
1766 VM_BUG_ON_PAGE(PageIsolated(page), page);
1771 static int zs_page_migrate(struct page *newpage, struct page *page,
1779 struct zpdesc *zpdesc = page_zpdesc(page);
1788 /* We're committed, tell the world that this is a Zsmalloc page. */
1791 /* The page is locked, so this pointer must remain valid */
1793 pool = zspage->pool;
1799 write_lock(&pool->migrate_lock);
1805 spin_lock(&class->lock);
1820 addr += class->size) {
1836 write_unlock(&pool->migrate_lock);
1837 spin_unlock(&class->lock);
1852 static void zs_page_putback(struct page *page)
1854 VM_BUG_ON_PAGE(!PageIsolated(page), page);
1877 class = pool->size_class[i];
1878 if (class->index != i)
1881 spin_lock(&class->lock);
1882 list_splice_init(&class->fullness_list[ZS_INUSE_RATIO_0],
1884 spin_unlock(&class->lock);
1888 list_del(&zspage->list);
1892 spin_lock(&class->lock);
1895 spin_unlock(&class->lock);
1901 schedule_work(&pool->free_work);
1906 flush_work(&pool->free_work);
1911 INIT_WORK(&pool->free_work, async_free_zspage);
1942 obj_wasted = obj_allocated - obj_used;
1943 obj_wasted /= class->objs_per_zspage;
1945 return obj_wasted * class->pages_per_zspage;
1959 write_lock(&pool->migrate_lock);
1960 spin_lock(&class->lock);
1981 pages_freed += class->pages_per_zspage;
1986 || rwlock_is_contended(&pool->migrate_lock)) {
1990 spin_unlock(&class->lock);
1991 write_unlock(&pool->migrate_lock);
1993 write_lock(&pool->migrate_lock);
1994 spin_lock(&class->lock);
2004 spin_unlock(&class->lock);
2005 write_unlock(&pool->migrate_lock);
2017 * Pool compaction is performed under pool->migrate_lock so it is basically
2018 * single-threaded. Having more than one thread in __zs_compact()
2019 * will increase pool->migrate_lock contention, which will impact other
2020 * zsmalloc operations that need pool->migrate_lock.
2022 if (atomic_xchg(&pool->compaction_in_progress, 1))
2025 for (i = ZS_SIZE_CLASSES - 1; i >= 0; i--) {
2026 class = pool->size_class[i];
2027 if (class->index != i)
2031 atomic_long_add(pages_freed, &pool->stats.pages_compacted);
2032 atomic_set(&pool->compaction_in_progress, 0);
2040 memcpy(stats, &pool->stats, sizeof(struct zs_pool_stats));
2048 struct zs_pool *pool = shrinker->private_data;
2066 struct zs_pool *pool = shrinker->private_data;
2068 for (i = ZS_SIZE_CLASSES - 1; i >= 0; i--) {
2069 class = pool->size_class[i];
2070 if (class->index != i)
2081 shrinker_free(pool->shrinker);
2086 pool->shrinker = shrinker_alloc(0, "mm-zspool:%s", pool->name);
2087 if (!pool->shrinker)
2088 return -ENOMEM;
2090 pool->shrinker->scan_objects = zs_shrinker_scan;
2091 pool->shrinker->count_objects = zs_shrinker_count;
2092 pool->shrinker->batch = 0;
2093 pool->shrinker->private_data = pool;
2095 shrinker_register(pool->shrinker);
2122 * zs_create_pool - Creates an allocation pool to work from.
2142 rwlock_init(&pool->migrate_lock);
2143 atomic_set(&pool->compaction_in_progress, 0);
2145 pool->name = kstrdup(name, GFP_KERNEL);
2146 if (!pool->name)
2153 * Iterate reversely, because, size of size_class that we want to use
2154 * for merging should be larger or equal to current size.
2156 for (i = ZS_SIZE_CLASSES - 1; i >= 0; i--) {
2157 int size;
2163 size = ZS_MIN_ALLOC_SIZE + i * ZS_SIZE_CLASS_DELTA;
2164 if (size > ZS_MAX_ALLOC_SIZE)
2165 size = ZS_MAX_ALLOC_SIZE;
2166 pages_per_zspage = calculate_zspage_chain_size(size);
2167 objs_per_zspage = pages_per_zspage * PAGE_SIZE / size;
2171 * so huge_class_size holds the size of the first huge
2177 huge_class_size = size;
2181 * unconditionally adds handle size before it performs
2182 * size class search - so object may be smaller than
2183 * huge class size, yet it still can end up in the huge
2187 huge_class_size -= (ZS_HANDLE_SIZE - 1);
2192 * as alloc/free for that size. Although it is natural that we
2193 * have one size_class for each size, there is a chance that we
2201 pool->size_class[i] = prev_class;
2210 class->size = size;
2211 class->index = i;
2212 class->pages_per_zspage = pages_per_zspage;
2213 class->objs_per_zspage = objs_per_zspage;
2214 spin_lock_init(&class->lock);
2215 pool->size_class[i] = class;
2219 INIT_LIST_HEAD(&class->fullness_list[fullness]);
2255 struct size_class *class = pool->size_class[i];
2260 if (class->index != i)
2264 if (list_empty(&class->fullness_list[fg]))
2267 pr_err("Class-%d fullness group %d is not empty\n",
2268 class->size, fg);
2274 kfree(pool->name);