1 // SPDX-License-Identifier: GPL-2.0 2 #define _GNU_SOURCE 3 4 #include <linux/limits.h> 5 #include <unistd.h> 6 #include <stdio.h> 7 #include <signal.h> 8 #include <sys/sysinfo.h> 9 #include <string.h> 10 #include <sys/wait.h> 11 #include <sys/mman.h> 12 13 #include "../kselftest.h" 14 #include "cgroup_util.h" 15 16 static int read_int(const char *path, size_t *value) 17 { 18 FILE *file; 19 int ret = 0; 20 21 file = fopen(path, "r"); 22 if (!file) 23 return -1; 24 if (fscanf(file, "%ld", value) != 1) 25 ret = -1; 26 fclose(file); 27 return ret; 28 } 29 30 static int set_min_free_kb(size_t value) 31 { 32 FILE *file; 33 int ret; 34 35 file = fopen("/proc/sys/vm/min_free_kbytes", "w"); 36 if (!file) 37 return -1; 38 ret = fprintf(file, "%ld\n", value); 39 fclose(file); 40 return ret; 41 } 42 43 static int read_min_free_kb(size_t *value) 44 { 45 return read_int("/proc/sys/vm/min_free_kbytes", value); 46 } 47 48 static int get_zswap_stored_pages(size_t *value) 49 { 50 return read_int("/sys/kernel/debug/zswap/stored_pages", value); 51 } 52 53 static int get_zswap_written_back_pages(size_t *value) 54 { 55 return read_int("/sys/kernel/debug/zswap/written_back_pages", value); 56 } 57 58 static long get_zswpout(const char *cgroup) 59 { 60 return cg_read_key_long(cgroup, "memory.stat", "zswpout "); 61 } 62 63 static int allocate_bytes(const char *cgroup, void *arg) 64 { 65 size_t size = (size_t)arg; 66 char *mem = (char *)malloc(size); 67 68 if (!mem) 69 return -1; 70 for (int i = 0; i < size; i += 4095) 71 mem[i] = 'a'; 72 free(mem); 73 return 0; 74 } 75 76 /* 77 * Sanity test to check that pages are written into zswap. 78 */ 79 static int test_zswap_usage(const char *root) 80 { 81 long zswpout_before, zswpout_after; 82 int ret = KSFT_FAIL; 83 char *test_group; 84 85 /* Set up */ 86 test_group = cg_name(root, "no_shrink_test"); 87 if (!test_group) 88 goto out; 89 if (cg_create(test_group)) 90 goto out; 91 if (cg_write(test_group, "memory.max", "1M")) 92 goto out; 93 94 zswpout_before = get_zswpout(test_group); 95 if (zswpout_before < 0) { 96 ksft_print_msg("Failed to get zswpout\n"); 97 goto out; 98 } 99 100 /* Allocate more than memory.max to push memory into zswap */ 101 if (cg_run(test_group, allocate_bytes, (void *)MB(4))) 102 goto out; 103 104 /* Verify that pages come into zswap */ 105 zswpout_after = get_zswpout(test_group); 106 if (zswpout_after <= zswpout_before) { 107 ksft_print_msg("zswpout does not increase after test program\n"); 108 goto out; 109 } 110 ret = KSFT_PASS; 111 112 out: 113 cg_destroy(test_group); 114 free(test_group); 115 return ret; 116 } 117 118 /* 119 * When trying to store a memcg page in zswap, if the memcg hits its memory 120 * limit in zswap, writeback should not be triggered. 121 * 122 * This was fixed with commit 0bdf0efa180a("zswap: do not shrink if cgroup may 123 * not zswap"). Needs to be revised when a per memcg writeback mechanism is 124 * implemented. 125 */ 126 static int test_no_invasive_cgroup_shrink(const char *root) 127 { 128 size_t written_back_before, written_back_after; 129 int ret = KSFT_FAIL; 130 char *test_group; 131 132 /* Set up */ 133 test_group = cg_name(root, "no_shrink_test"); 134 if (!test_group) 135 goto out; 136 if (cg_create(test_group)) 137 goto out; 138 if (cg_write(test_group, "memory.max", "1M")) 139 goto out; 140 if (cg_write(test_group, "memory.zswap.max", "10K")) 141 goto out; 142 if (get_zswap_written_back_pages(&written_back_before)) 143 goto out; 144 145 /* Allocate 10x memory.max to push memory into zswap */ 146 if (cg_run(test_group, allocate_bytes, (void *)MB(10))) 147 goto out; 148 149 /* Verify that no writeback happened because of the memcg allocation */ 150 if (get_zswap_written_back_pages(&written_back_after)) 151 goto out; 152 if (written_back_after == written_back_before) 153 ret = KSFT_PASS; 154 out: 155 cg_destroy(test_group); 156 free(test_group); 157 return ret; 158 } 159 160 struct no_kmem_bypass_child_args { 161 size_t target_alloc_bytes; 162 size_t child_allocated; 163 }; 164 165 static int no_kmem_bypass_child(const char *cgroup, void *arg) 166 { 167 struct no_kmem_bypass_child_args *values = arg; 168 void *allocation; 169 170 allocation = malloc(values->target_alloc_bytes); 171 if (!allocation) { 172 values->child_allocated = true; 173 return -1; 174 } 175 for (long i = 0; i < values->target_alloc_bytes; i += 4095) 176 ((char *)allocation)[i] = 'a'; 177 values->child_allocated = true; 178 pause(); 179 free(allocation); 180 return 0; 181 } 182 183 /* 184 * When pages owned by a memcg are pushed to zswap by kswapd, they should be 185 * charged to that cgroup. This wasn't the case before commit 186 * cd08d80ecdac("mm: correctly charge compressed memory to its memcg"). 187 * 188 * The test first allocates memory in a memcg, then raises min_free_kbytes to 189 * a very high value so that the allocation falls below low wm, then makes 190 * another allocation to trigger kswapd that should push the memcg-owned pages 191 * to zswap and verifies that the zswap pages are correctly charged. 192 * 193 * To be run on a VM with at most 4G of memory. 194 */ 195 static int test_no_kmem_bypass(const char *root) 196 { 197 size_t min_free_kb_high, min_free_kb_low, min_free_kb_original; 198 struct no_kmem_bypass_child_args *values; 199 size_t trigger_allocation_size; 200 int wait_child_iteration = 0; 201 long stored_pages_threshold; 202 struct sysinfo sys_info; 203 int ret = KSFT_FAIL; 204 int child_status; 205 char *test_group; 206 pid_t child_pid; 207 208 /* Read sys info and compute test values accordingly */ 209 if (sysinfo(&sys_info) != 0) 210 return KSFT_FAIL; 211 if (sys_info.totalram > 5000000000) 212 return KSFT_SKIP; 213 values = mmap(0, sizeof(struct no_kmem_bypass_child_args), PROT_READ | 214 PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); 215 if (values == MAP_FAILED) 216 return KSFT_FAIL; 217 if (read_min_free_kb(&min_free_kb_original)) 218 return KSFT_FAIL; 219 min_free_kb_high = sys_info.totalram / 2000; 220 min_free_kb_low = sys_info.totalram / 500000; 221 values->target_alloc_bytes = (sys_info.totalram - min_free_kb_high * 1000) + 222 sys_info.totalram * 5 / 100; 223 stored_pages_threshold = sys_info.totalram / 5 / 4096; 224 trigger_allocation_size = sys_info.totalram / 20; 225 226 /* Set up test memcg */ 227 if (cg_write(root, "cgroup.subtree_control", "+memory")) 228 goto out; 229 test_group = cg_name(root, "kmem_bypass_test"); 230 if (!test_group) 231 goto out; 232 233 /* Spawn memcg child and wait for it to allocate */ 234 set_min_free_kb(min_free_kb_low); 235 if (cg_create(test_group)) 236 goto out; 237 values->child_allocated = false; 238 child_pid = cg_run_nowait(test_group, no_kmem_bypass_child, values); 239 if (child_pid < 0) 240 goto out; 241 while (!values->child_allocated && wait_child_iteration++ < 10000) 242 usleep(1000); 243 244 /* Try to wakeup kswapd and let it push child memory to zswap */ 245 set_min_free_kb(min_free_kb_high); 246 for (int i = 0; i < 20; i++) { 247 size_t stored_pages; 248 char *trigger_allocation = malloc(trigger_allocation_size); 249 250 if (!trigger_allocation) 251 break; 252 for (int i = 0; i < trigger_allocation_size; i += 4095) 253 trigger_allocation[i] = 'b'; 254 usleep(100000); 255 free(trigger_allocation); 256 if (get_zswap_stored_pages(&stored_pages)) 257 break; 258 if (stored_pages < 0) 259 break; 260 /* If memory was pushed to zswap, verify it belongs to memcg */ 261 if (stored_pages > stored_pages_threshold) { 262 int zswapped = cg_read_key_long(test_group, "memory.stat", "zswapped "); 263 int delta = stored_pages * 4096 - zswapped; 264 int result_ok = delta < stored_pages * 4096 / 4; 265 266 ret = result_ok ? KSFT_PASS : KSFT_FAIL; 267 break; 268 } 269 } 270 271 kill(child_pid, SIGTERM); 272 waitpid(child_pid, &child_status, 0); 273 out: 274 set_min_free_kb(min_free_kb_original); 275 cg_destroy(test_group); 276 free(test_group); 277 return ret; 278 } 279 280 #define T(x) { x, #x } 281 struct zswap_test { 282 int (*fn)(const char *root); 283 const char *name; 284 } tests[] = { 285 T(test_zswap_usage), 286 T(test_no_kmem_bypass), 287 T(test_no_invasive_cgroup_shrink), 288 }; 289 #undef T 290 291 static bool zswap_configured(void) 292 { 293 return access("/sys/module/zswap", F_OK) == 0; 294 } 295 296 int main(int argc, char **argv) 297 { 298 char root[PATH_MAX]; 299 int i, ret = EXIT_SUCCESS; 300 301 if (cg_find_unified_root(root, sizeof(root))) 302 ksft_exit_skip("cgroup v2 isn't mounted\n"); 303 304 if (!zswap_configured()) 305 ksft_exit_skip("zswap isn't configured\n"); 306 307 /* 308 * Check that memory controller is available: 309 * memory is listed in cgroup.controllers 310 */ 311 if (cg_read_strstr(root, "cgroup.controllers", "memory")) 312 ksft_exit_skip("memory controller isn't available\n"); 313 314 if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) 315 if (cg_write(root, "cgroup.subtree_control", "+memory")) 316 ksft_exit_skip("Failed to set memory controller\n"); 317 318 for (i = 0; i < ARRAY_SIZE(tests); i++) { 319 switch (tests[i].fn(root)) { 320 case KSFT_PASS: 321 ksft_test_result_pass("%s\n", tests[i].name); 322 break; 323 case KSFT_SKIP: 324 ksft_test_result_skip("%s\n", tests[i].name); 325 break; 326 default: 327 ret = EXIT_FAILURE; 328 ksft_test_result_fail("%s\n", tests[i].name); 329 break; 330 } 331 } 332 333 return ret; 334 } 335