1 // SPDX-License-Identifier: GPL-2.0 2 #define _GNU_SOURCE 3 4 #include <linux/limits.h> 5 #include <unistd.h> 6 #include <stdio.h> 7 #include <signal.h> 8 #include <sys/sysinfo.h> 9 #include <string.h> 10 #include <sys/wait.h> 11 #include <sys/mman.h> 12 13 #include "../kselftest.h" 14 #include "cgroup_util.h" 15 16 static int read_int(const char *path, size_t *value) 17 { 18 FILE *file; 19 int ret = 0; 20 21 file = fopen(path, "r"); 22 if (!file) 23 return -1; 24 if (fscanf(file, "%ld", value) != 1) 25 ret = -1; 26 fclose(file); 27 return ret; 28 } 29 30 static int set_min_free_kb(size_t value) 31 { 32 FILE *file; 33 int ret; 34 35 file = fopen("/proc/sys/vm/min_free_kbytes", "w"); 36 if (!file) 37 return -1; 38 ret = fprintf(file, "%ld\n", value); 39 fclose(file); 40 return ret; 41 } 42 43 static int read_min_free_kb(size_t *value) 44 { 45 return read_int("/proc/sys/vm/min_free_kbytes", value); 46 } 47 48 static int get_zswap_stored_pages(size_t *value) 49 { 50 return read_int("/sys/kernel/debug/zswap/stored_pages", value); 51 } 52 53 static int get_cg_wb_count(const char *cg) 54 { 55 return cg_read_key_long(cg, "memory.stat", "zswp_wb"); 56 } 57 58 static long get_zswpout(const char *cgroup) 59 { 60 return cg_read_key_long(cgroup, "memory.stat", "zswpout "); 61 } 62 63 static int allocate_bytes(const char *cgroup, void *arg) 64 { 65 size_t size = (size_t)arg; 66 char *mem = (char *)malloc(size); 67 68 if (!mem) 69 return -1; 70 for (int i = 0; i < size; i += 4095) 71 mem[i] = 'a'; 72 free(mem); 73 return 0; 74 } 75 76 static char *setup_test_group_1M(const char *root, const char *name) 77 { 78 char *group_name = cg_name(root, name); 79 80 if (!group_name) 81 return NULL; 82 if (cg_create(group_name)) 83 goto fail; 84 if (cg_write(group_name, "memory.max", "1M")) { 85 cg_destroy(group_name); 86 goto fail; 87 } 88 return group_name; 89 fail: 90 free(group_name); 91 return NULL; 92 } 93 94 /* 95 * Sanity test to check that pages are written into zswap. 96 */ 97 static int test_zswap_usage(const char *root) 98 { 99 long zswpout_before, zswpout_after; 100 int ret = KSFT_FAIL; 101 char *test_group; 102 103 /* Set up */ 104 test_group = cg_name(root, "no_shrink_test"); 105 if (!test_group) 106 goto out; 107 if (cg_create(test_group)) 108 goto out; 109 if (cg_write(test_group, "memory.max", "1M")) 110 goto out; 111 112 zswpout_before = get_zswpout(test_group); 113 if (zswpout_before < 0) { 114 ksft_print_msg("Failed to get zswpout\n"); 115 goto out; 116 } 117 118 /* Allocate more than memory.max to push memory into zswap */ 119 if (cg_run(test_group, allocate_bytes, (void *)MB(4))) 120 goto out; 121 122 /* Verify that pages come into zswap */ 123 zswpout_after = get_zswpout(test_group); 124 if (zswpout_after <= zswpout_before) { 125 ksft_print_msg("zswpout does not increase after test program\n"); 126 goto out; 127 } 128 ret = KSFT_PASS; 129 130 out: 131 cg_destroy(test_group); 132 free(test_group); 133 return ret; 134 } 135 136 /* 137 * When trying to store a memcg page in zswap, if the memcg hits its memory 138 * limit in zswap, writeback should affect only the zswapped pages of that 139 * memcg. 140 */ 141 static int test_no_invasive_cgroup_shrink(const char *root) 142 { 143 int ret = KSFT_FAIL; 144 size_t control_allocation_size = MB(10); 145 char *control_allocation, *wb_group = NULL, *control_group = NULL; 146 147 /* Set up */ 148 wb_group = setup_test_group_1M(root, "per_memcg_wb_test1"); 149 if (!wb_group) 150 return KSFT_FAIL; 151 if (cg_write(wb_group, "memory.zswap.max", "10K")) 152 goto out; 153 control_group = setup_test_group_1M(root, "per_memcg_wb_test2"); 154 if (!control_group) 155 goto out; 156 157 /* Push some test_group2 memory into zswap */ 158 if (cg_enter_current(control_group)) 159 goto out; 160 control_allocation = malloc(control_allocation_size); 161 for (int i = 0; i < control_allocation_size; i += 4095) 162 control_allocation[i] = 'a'; 163 if (cg_read_key_long(control_group, "memory.stat", "zswapped") < 1) 164 goto out; 165 166 /* Allocate 10x memory.max to push wb_group memory into zswap and trigger wb */ 167 if (cg_run(wb_group, allocate_bytes, (void *)MB(10))) 168 goto out; 169 170 /* Verify that only zswapped memory from gwb_group has been written back */ 171 if (get_cg_wb_count(wb_group) > 0 && get_cg_wb_count(control_group) == 0) 172 ret = KSFT_PASS; 173 out: 174 cg_enter_current(root); 175 if (control_group) { 176 cg_destroy(control_group); 177 free(control_group); 178 } 179 cg_destroy(wb_group); 180 free(wb_group); 181 if (control_allocation) 182 free(control_allocation); 183 return ret; 184 } 185 186 struct no_kmem_bypass_child_args { 187 size_t target_alloc_bytes; 188 size_t child_allocated; 189 }; 190 191 static int no_kmem_bypass_child(const char *cgroup, void *arg) 192 { 193 struct no_kmem_bypass_child_args *values = arg; 194 void *allocation; 195 196 allocation = malloc(values->target_alloc_bytes); 197 if (!allocation) { 198 values->child_allocated = true; 199 return -1; 200 } 201 for (long i = 0; i < values->target_alloc_bytes; i += 4095) 202 ((char *)allocation)[i] = 'a'; 203 values->child_allocated = true; 204 pause(); 205 free(allocation); 206 return 0; 207 } 208 209 /* 210 * When pages owned by a memcg are pushed to zswap by kswapd, they should be 211 * charged to that cgroup. This wasn't the case before commit 212 * cd08d80ecdac("mm: correctly charge compressed memory to its memcg"). 213 * 214 * The test first allocates memory in a memcg, then raises min_free_kbytes to 215 * a very high value so that the allocation falls below low wm, then makes 216 * another allocation to trigger kswapd that should push the memcg-owned pages 217 * to zswap and verifies that the zswap pages are correctly charged. 218 * 219 * To be run on a VM with at most 4G of memory. 220 */ 221 static int test_no_kmem_bypass(const char *root) 222 { 223 size_t min_free_kb_high, min_free_kb_low, min_free_kb_original; 224 struct no_kmem_bypass_child_args *values; 225 size_t trigger_allocation_size; 226 int wait_child_iteration = 0; 227 long stored_pages_threshold; 228 struct sysinfo sys_info; 229 int ret = KSFT_FAIL; 230 int child_status; 231 char *test_group; 232 pid_t child_pid; 233 234 /* Read sys info and compute test values accordingly */ 235 if (sysinfo(&sys_info) != 0) 236 return KSFT_FAIL; 237 if (sys_info.totalram > 5000000000) 238 return KSFT_SKIP; 239 values = mmap(0, sizeof(struct no_kmem_bypass_child_args), PROT_READ | 240 PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); 241 if (values == MAP_FAILED) 242 return KSFT_FAIL; 243 if (read_min_free_kb(&min_free_kb_original)) 244 return KSFT_FAIL; 245 min_free_kb_high = sys_info.totalram / 2000; 246 min_free_kb_low = sys_info.totalram / 500000; 247 values->target_alloc_bytes = (sys_info.totalram - min_free_kb_high * 1000) + 248 sys_info.totalram * 5 / 100; 249 stored_pages_threshold = sys_info.totalram / 5 / 4096; 250 trigger_allocation_size = sys_info.totalram / 20; 251 252 /* Set up test memcg */ 253 if (cg_write(root, "cgroup.subtree_control", "+memory")) 254 goto out; 255 test_group = cg_name(root, "kmem_bypass_test"); 256 if (!test_group) 257 goto out; 258 259 /* Spawn memcg child and wait for it to allocate */ 260 set_min_free_kb(min_free_kb_low); 261 if (cg_create(test_group)) 262 goto out; 263 values->child_allocated = false; 264 child_pid = cg_run_nowait(test_group, no_kmem_bypass_child, values); 265 if (child_pid < 0) 266 goto out; 267 while (!values->child_allocated && wait_child_iteration++ < 10000) 268 usleep(1000); 269 270 /* Try to wakeup kswapd and let it push child memory to zswap */ 271 set_min_free_kb(min_free_kb_high); 272 for (int i = 0; i < 20; i++) { 273 size_t stored_pages; 274 char *trigger_allocation = malloc(trigger_allocation_size); 275 276 if (!trigger_allocation) 277 break; 278 for (int i = 0; i < trigger_allocation_size; i += 4095) 279 trigger_allocation[i] = 'b'; 280 usleep(100000); 281 free(trigger_allocation); 282 if (get_zswap_stored_pages(&stored_pages)) 283 break; 284 if (stored_pages < 0) 285 break; 286 /* If memory was pushed to zswap, verify it belongs to memcg */ 287 if (stored_pages > stored_pages_threshold) { 288 int zswapped = cg_read_key_long(test_group, "memory.stat", "zswapped "); 289 int delta = stored_pages * 4096 - zswapped; 290 int result_ok = delta < stored_pages * 4096 / 4; 291 292 ret = result_ok ? KSFT_PASS : KSFT_FAIL; 293 break; 294 } 295 } 296 297 kill(child_pid, SIGTERM); 298 waitpid(child_pid, &child_status, 0); 299 out: 300 set_min_free_kb(min_free_kb_original); 301 cg_destroy(test_group); 302 free(test_group); 303 return ret; 304 } 305 306 #define T(x) { x, #x } 307 struct zswap_test { 308 int (*fn)(const char *root); 309 const char *name; 310 } tests[] = { 311 T(test_zswap_usage), 312 T(test_no_kmem_bypass), 313 T(test_no_invasive_cgroup_shrink), 314 }; 315 #undef T 316 317 static bool zswap_configured(void) 318 { 319 return access("/sys/module/zswap", F_OK) == 0; 320 } 321 322 int main(int argc, char **argv) 323 { 324 char root[PATH_MAX]; 325 int i, ret = EXIT_SUCCESS; 326 327 if (cg_find_unified_root(root, sizeof(root))) 328 ksft_exit_skip("cgroup v2 isn't mounted\n"); 329 330 if (!zswap_configured()) 331 ksft_exit_skip("zswap isn't configured\n"); 332 333 /* 334 * Check that memory controller is available: 335 * memory is listed in cgroup.controllers 336 */ 337 if (cg_read_strstr(root, "cgroup.controllers", "memory")) 338 ksft_exit_skip("memory controller isn't available\n"); 339 340 if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) 341 if (cg_write(root, "cgroup.subtree_control", "+memory")) 342 ksft_exit_skip("Failed to set memory controller\n"); 343 344 for (i = 0; i < ARRAY_SIZE(tests); i++) { 345 switch (tests[i].fn(root)) { 346 case KSFT_PASS: 347 ksft_test_result_pass("%s\n", tests[i].name); 348 break; 349 case KSFT_SKIP: 350 ksft_test_result_skip("%s\n", tests[i].name); 351 break; 352 default: 353 ret = EXIT_FAILURE; 354 ksft_test_result_fail("%s\n", tests[i].name); 355 break; 356 } 357 } 358 359 return ret; 360 } 361