1 // SPDX-License-Identifier: GPL-2.0 2 #define _GNU_SOURCE 3 4 #include <linux/limits.h> 5 #include <unistd.h> 6 #include <stdio.h> 7 #include <signal.h> 8 #include <sys/sysinfo.h> 9 #include <string.h> 10 #include <sys/wait.h> 11 #include <sys/mman.h> 12 13 #include "../kselftest.h" 14 #include "cgroup_util.h" 15 16 static int read_int(const char *path, size_t *value) 17 { 18 FILE *file; 19 int ret = 0; 20 21 file = fopen(path, "r"); 22 if (!file) 23 return -1; 24 if (fscanf(file, "%ld", value) != 1) 25 ret = -1; 26 fclose(file); 27 return ret; 28 } 29 30 static int set_min_free_kb(size_t value) 31 { 32 FILE *file; 33 int ret; 34 35 file = fopen("/proc/sys/vm/min_free_kbytes", "w"); 36 if (!file) 37 return -1; 38 ret = fprintf(file, "%ld\n", value); 39 fclose(file); 40 return ret; 41 } 42 43 static int read_min_free_kb(size_t *value) 44 { 45 return read_int("/proc/sys/vm/min_free_kbytes", value); 46 } 47 48 static int get_zswap_stored_pages(size_t *value) 49 { 50 return read_int("/sys/kernel/debug/zswap/stored_pages", value); 51 } 52 53 static int get_cg_wb_count(const char *cg) 54 { 55 return cg_read_key_long(cg, "memory.stat", "zswpwb"); 56 } 57 58 static long get_zswpout(const char *cgroup) 59 { 60 return cg_read_key_long(cgroup, "memory.stat", "zswpout "); 61 } 62 63 static int allocate_and_read_bytes(const char *cgroup, void *arg) 64 { 65 size_t size = (size_t)arg; 66 char *mem = (char *)malloc(size); 67 int ret = 0; 68 69 if (!mem) 70 return -1; 71 for (int i = 0; i < size; i += 4095) 72 mem[i] = 'a'; 73 74 /* Go through the allocated memory to (z)swap in and out pages */ 75 for (int i = 0; i < size; i += 4095) { 76 if (mem[i] != 'a') 77 ret = -1; 78 } 79 80 free(mem); 81 return ret; 82 } 83 84 static int allocate_bytes(const char *cgroup, void *arg) 85 { 86 size_t size = (size_t)arg; 87 char *mem = (char *)malloc(size); 88 89 if (!mem) 90 return -1; 91 for (int i = 0; i < size; i += 4095) 92 mem[i] = 'a'; 93 free(mem); 94 return 0; 95 } 96 97 static char *setup_test_group_1M(const char *root, const char *name) 98 { 99 char *group_name = cg_name(root, name); 100 101 if (!group_name) 102 return NULL; 103 if (cg_create(group_name)) 104 goto fail; 105 if (cg_write(group_name, "memory.max", "1M")) { 106 cg_destroy(group_name); 107 goto fail; 108 } 109 return group_name; 110 fail: 111 free(group_name); 112 return NULL; 113 } 114 115 /* 116 * Sanity test to check that pages are written into zswap. 117 */ 118 static int test_zswap_usage(const char *root) 119 { 120 long zswpout_before, zswpout_after; 121 int ret = KSFT_FAIL; 122 char *test_group; 123 124 test_group = cg_name(root, "no_shrink_test"); 125 if (!test_group) 126 goto out; 127 if (cg_create(test_group)) 128 goto out; 129 if (cg_write(test_group, "memory.max", "1M")) 130 goto out; 131 132 zswpout_before = get_zswpout(test_group); 133 if (zswpout_before < 0) { 134 ksft_print_msg("Failed to get zswpout\n"); 135 goto out; 136 } 137 138 /* Allocate more than memory.max to push memory into zswap */ 139 if (cg_run(test_group, allocate_bytes, (void *)MB(4))) 140 goto out; 141 142 /* Verify that pages come into zswap */ 143 zswpout_after = get_zswpout(test_group); 144 if (zswpout_after <= zswpout_before) { 145 ksft_print_msg("zswpout does not increase after test program\n"); 146 goto out; 147 } 148 ret = KSFT_PASS; 149 150 out: 151 cg_destroy(test_group); 152 free(test_group); 153 return ret; 154 } 155 156 /* 157 * Check that when memory.zswap.max = 0, no pages can go to the zswap pool for 158 * the cgroup. 159 */ 160 static int test_swapin_nozswap(const char *root) 161 { 162 int ret = KSFT_FAIL; 163 char *test_group; 164 long swap_peak, zswpout; 165 166 test_group = cg_name(root, "no_zswap_test"); 167 if (!test_group) 168 goto out; 169 if (cg_create(test_group)) 170 goto out; 171 if (cg_write(test_group, "memory.max", "8M")) 172 goto out; 173 if (cg_write(test_group, "memory.zswap.max", "0")) 174 goto out; 175 176 /* Allocate and read more than memory.max to trigger swapin */ 177 if (cg_run(test_group, allocate_and_read_bytes, (void *)MB(32))) 178 goto out; 179 180 /* Verify that pages are swapped out, but no zswap happened */ 181 swap_peak = cg_read_long(test_group, "memory.swap.peak"); 182 if (swap_peak < 0) { 183 ksft_print_msg("failed to get cgroup's swap_peak\n"); 184 goto out; 185 } 186 187 if (swap_peak < MB(24)) { 188 ksft_print_msg("at least 24MB of memory should be swapped out\n"); 189 goto out; 190 } 191 192 zswpout = get_zswpout(test_group); 193 if (zswpout < 0) { 194 ksft_print_msg("failed to get zswpout\n"); 195 goto out; 196 } 197 198 if (zswpout > 0) { 199 ksft_print_msg("zswapout > 0 when memory.zswap.max = 0\n"); 200 goto out; 201 } 202 203 ret = KSFT_PASS; 204 205 out: 206 cg_destroy(test_group); 207 free(test_group); 208 return ret; 209 } 210 211 /* Simple test to verify the (z)swapin code paths */ 212 static int test_zswapin(const char *root) 213 { 214 int ret = KSFT_FAIL; 215 char *test_group; 216 long zswpin; 217 218 test_group = cg_name(root, "zswapin_test"); 219 if (!test_group) 220 goto out; 221 if (cg_create(test_group)) 222 goto out; 223 if (cg_write(test_group, "memory.max", "8M")) 224 goto out; 225 if (cg_write(test_group, "memory.zswap.max", "max")) 226 goto out; 227 228 /* Allocate and read more than memory.max to trigger (z)swap in */ 229 if (cg_run(test_group, allocate_and_read_bytes, (void *)MB(32))) 230 goto out; 231 232 zswpin = cg_read_key_long(test_group, "memory.stat", "zswpin "); 233 if (zswpin < 0) { 234 ksft_print_msg("failed to get zswpin\n"); 235 goto out; 236 } 237 238 if (zswpin < MB(24) / PAGE_SIZE) { 239 ksft_print_msg("at least 24MB should be brought back from zswap\n"); 240 goto out; 241 } 242 243 ret = KSFT_PASS; 244 245 out: 246 cg_destroy(test_group); 247 free(test_group); 248 return ret; 249 } 250 251 /* 252 * When trying to store a memcg page in zswap, if the memcg hits its memory 253 * limit in zswap, writeback should affect only the zswapped pages of that 254 * memcg. 255 */ 256 static int test_no_invasive_cgroup_shrink(const char *root) 257 { 258 int ret = KSFT_FAIL; 259 size_t control_allocation_size = MB(10); 260 char *control_allocation, *wb_group = NULL, *control_group = NULL; 261 262 wb_group = setup_test_group_1M(root, "per_memcg_wb_test1"); 263 if (!wb_group) 264 return KSFT_FAIL; 265 if (cg_write(wb_group, "memory.zswap.max", "10K")) 266 goto out; 267 control_group = setup_test_group_1M(root, "per_memcg_wb_test2"); 268 if (!control_group) 269 goto out; 270 271 /* Push some test_group2 memory into zswap */ 272 if (cg_enter_current(control_group)) 273 goto out; 274 control_allocation = malloc(control_allocation_size); 275 for (int i = 0; i < control_allocation_size; i += 4095) 276 control_allocation[i] = 'a'; 277 if (cg_read_key_long(control_group, "memory.stat", "zswapped") < 1) 278 goto out; 279 280 /* Allocate 10x memory.max to push wb_group memory into zswap and trigger wb */ 281 if (cg_run(wb_group, allocate_bytes, (void *)MB(10))) 282 goto out; 283 284 /* Verify that only zswapped memory from gwb_group has been written back */ 285 if (get_cg_wb_count(wb_group) > 0 && get_cg_wb_count(control_group) == 0) 286 ret = KSFT_PASS; 287 out: 288 cg_enter_current(root); 289 if (control_group) { 290 cg_destroy(control_group); 291 free(control_group); 292 } 293 cg_destroy(wb_group); 294 free(wb_group); 295 if (control_allocation) 296 free(control_allocation); 297 return ret; 298 } 299 300 struct no_kmem_bypass_child_args { 301 size_t target_alloc_bytes; 302 size_t child_allocated; 303 }; 304 305 static int no_kmem_bypass_child(const char *cgroup, void *arg) 306 { 307 struct no_kmem_bypass_child_args *values = arg; 308 void *allocation; 309 310 allocation = malloc(values->target_alloc_bytes); 311 if (!allocation) { 312 values->child_allocated = true; 313 return -1; 314 } 315 for (long i = 0; i < values->target_alloc_bytes; i += 4095) 316 ((char *)allocation)[i] = 'a'; 317 values->child_allocated = true; 318 pause(); 319 free(allocation); 320 return 0; 321 } 322 323 /* 324 * When pages owned by a memcg are pushed to zswap by kswapd, they should be 325 * charged to that cgroup. This wasn't the case before commit 326 * cd08d80ecdac("mm: correctly charge compressed memory to its memcg"). 327 * 328 * The test first allocates memory in a memcg, then raises min_free_kbytes to 329 * a very high value so that the allocation falls below low wm, then makes 330 * another allocation to trigger kswapd that should push the memcg-owned pages 331 * to zswap and verifies that the zswap pages are correctly charged. 332 * 333 * To be run on a VM with at most 4G of memory. 334 */ 335 static int test_no_kmem_bypass(const char *root) 336 { 337 size_t min_free_kb_high, min_free_kb_low, min_free_kb_original; 338 struct no_kmem_bypass_child_args *values; 339 size_t trigger_allocation_size; 340 int wait_child_iteration = 0; 341 long stored_pages_threshold; 342 struct sysinfo sys_info; 343 int ret = KSFT_FAIL; 344 int child_status; 345 char *test_group; 346 pid_t child_pid; 347 348 /* Read sys info and compute test values accordingly */ 349 if (sysinfo(&sys_info) != 0) 350 return KSFT_FAIL; 351 if (sys_info.totalram > 5000000000) 352 return KSFT_SKIP; 353 values = mmap(0, sizeof(struct no_kmem_bypass_child_args), PROT_READ | 354 PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); 355 if (values == MAP_FAILED) 356 return KSFT_FAIL; 357 if (read_min_free_kb(&min_free_kb_original)) 358 return KSFT_FAIL; 359 min_free_kb_high = sys_info.totalram / 2000; 360 min_free_kb_low = sys_info.totalram / 500000; 361 values->target_alloc_bytes = (sys_info.totalram - min_free_kb_high * 1000) + 362 sys_info.totalram * 5 / 100; 363 stored_pages_threshold = sys_info.totalram / 5 / 4096; 364 trigger_allocation_size = sys_info.totalram / 20; 365 366 /* Set up test memcg */ 367 if (cg_write(root, "cgroup.subtree_control", "+memory")) 368 goto out; 369 test_group = cg_name(root, "kmem_bypass_test"); 370 if (!test_group) 371 goto out; 372 373 /* Spawn memcg child and wait for it to allocate */ 374 set_min_free_kb(min_free_kb_low); 375 if (cg_create(test_group)) 376 goto out; 377 values->child_allocated = false; 378 child_pid = cg_run_nowait(test_group, no_kmem_bypass_child, values); 379 if (child_pid < 0) 380 goto out; 381 while (!values->child_allocated && wait_child_iteration++ < 10000) 382 usleep(1000); 383 384 /* Try to wakeup kswapd and let it push child memory to zswap */ 385 set_min_free_kb(min_free_kb_high); 386 for (int i = 0; i < 20; i++) { 387 size_t stored_pages; 388 char *trigger_allocation = malloc(trigger_allocation_size); 389 390 if (!trigger_allocation) 391 break; 392 for (int i = 0; i < trigger_allocation_size; i += 4095) 393 trigger_allocation[i] = 'b'; 394 usleep(100000); 395 free(trigger_allocation); 396 if (get_zswap_stored_pages(&stored_pages)) 397 break; 398 if (stored_pages < 0) 399 break; 400 /* If memory was pushed to zswap, verify it belongs to memcg */ 401 if (stored_pages > stored_pages_threshold) { 402 int zswapped = cg_read_key_long(test_group, "memory.stat", "zswapped "); 403 int delta = stored_pages * 4096 - zswapped; 404 int result_ok = delta < stored_pages * 4096 / 4; 405 406 ret = result_ok ? KSFT_PASS : KSFT_FAIL; 407 break; 408 } 409 } 410 411 kill(child_pid, SIGTERM); 412 waitpid(child_pid, &child_status, 0); 413 out: 414 set_min_free_kb(min_free_kb_original); 415 cg_destroy(test_group); 416 free(test_group); 417 return ret; 418 } 419 420 #define T(x) { x, #x } 421 struct zswap_test { 422 int (*fn)(const char *root); 423 const char *name; 424 } tests[] = { 425 T(test_zswap_usage), 426 T(test_swapin_nozswap), 427 T(test_zswapin), 428 T(test_no_kmem_bypass), 429 T(test_no_invasive_cgroup_shrink), 430 }; 431 #undef T 432 433 static bool zswap_configured(void) 434 { 435 return access("/sys/module/zswap", F_OK) == 0; 436 } 437 438 int main(int argc, char **argv) 439 { 440 char root[PATH_MAX]; 441 int i, ret = EXIT_SUCCESS; 442 443 if (cg_find_unified_root(root, sizeof(root))) 444 ksft_exit_skip("cgroup v2 isn't mounted\n"); 445 446 if (!zswap_configured()) 447 ksft_exit_skip("zswap isn't configured\n"); 448 449 /* 450 * Check that memory controller is available: 451 * memory is listed in cgroup.controllers 452 */ 453 if (cg_read_strstr(root, "cgroup.controllers", "memory")) 454 ksft_exit_skip("memory controller isn't available\n"); 455 456 if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) 457 if (cg_write(root, "cgroup.subtree_control", "+memory")) 458 ksft_exit_skip("Failed to set memory controller\n"); 459 460 for (i = 0; i < ARRAY_SIZE(tests); i++) { 461 switch (tests[i].fn(root)) { 462 case KSFT_PASS: 463 ksft_test_result_pass("%s\n", tests[i].name); 464 break; 465 case KSFT_SKIP: 466 ksft_test_result_skip("%s\n", tests[i].name); 467 break; 468 default: 469 ret = EXIT_FAILURE; 470 ksft_test_result_fail("%s\n", tests[i].name); 471 break; 472 } 473 } 474 475 return ret; 476 } 477