1 // SPDX-License-Identifier: GPL-2.0 2 #define _GNU_SOURCE 3 4 #include <linux/limits.h> 5 #include <fcntl.h> 6 #include <stdio.h> 7 #include <stdlib.h> 8 #include <string.h> 9 #include <sys/stat.h> 10 #include <sys/types.h> 11 #include <unistd.h> 12 #include <sys/wait.h> 13 #include <errno.h> 14 #include <sys/sysinfo.h> 15 #include <pthread.h> 16 17 #include "../kselftest.h" 18 #include "cgroup_util.h" 19 20 21 static int alloc_dcache(const char *cgroup, void *arg) 22 { 23 unsigned long i; 24 struct stat st; 25 char buf[128]; 26 27 for (i = 0; i < (unsigned long)arg; i++) { 28 snprintf(buf, sizeof(buf), 29 "/something-non-existent-with-a-long-name-%64lu-%d", 30 i, getpid()); 31 stat(buf, &st); 32 } 33 34 return 0; 35 } 36 37 /* 38 * This test allocates 100000 of negative dentries with long names. 39 * Then it checks that "slab" in memory.stat is larger than 1M. 40 * Then it sets memory.high to 1M and checks that at least 1/2 41 * of slab memory has been reclaimed. 42 */ 43 static int test_kmem_basic(const char *root) 44 { 45 int ret = KSFT_FAIL; 46 char *cg = NULL; 47 long slab0, slab1, current; 48 49 cg = cg_name(root, "kmem_basic_test"); 50 if (!cg) 51 goto cleanup; 52 53 if (cg_create(cg)) 54 goto cleanup; 55 56 if (cg_run(cg, alloc_dcache, (void *)100000)) 57 goto cleanup; 58 59 slab0 = cg_read_key_long(cg, "memory.stat", "slab "); 60 if (slab0 < (1 << 20)) 61 goto cleanup; 62 63 cg_write(cg, "memory.high", "1M"); 64 slab1 = cg_read_key_long(cg, "memory.stat", "slab "); 65 if (slab1 <= 0) 66 goto cleanup; 67 68 current = cg_read_long(cg, "memory.current"); 69 if (current <= 0) 70 goto cleanup; 71 72 if (slab1 < slab0 / 2 && current < slab0 / 2) 73 ret = KSFT_PASS; 74 cleanup: 75 cg_destroy(cg); 76 free(cg); 77 78 return ret; 79 } 80 81 static void *alloc_kmem_fn(void *arg) 82 { 83 alloc_dcache(NULL, (void *)100); 84 return NULL; 85 } 86 87 static int alloc_kmem_smp(const char *cgroup, void *arg) 88 { 89 int nr_threads = 2 * get_nprocs(); 90 pthread_t *tinfo; 91 unsigned long i; 92 int ret = -1; 93 94 tinfo = calloc(nr_threads, sizeof(pthread_t)); 95 if (tinfo == NULL) 96 return -1; 97 98 for (i = 0; i < nr_threads; i++) { 99 if (pthread_create(&tinfo[i], NULL, &alloc_kmem_fn, 100 (void *)i)) { 101 free(tinfo); 102 return -1; 103 } 104 } 105 106 for (i = 0; i < nr_threads; i++) { 107 ret = pthread_join(tinfo[i], NULL); 108 if (ret) 109 break; 110 } 111 112 free(tinfo); 113 return ret; 114 } 115 116 static int cg_run_in_subcgroups(const char *parent, 117 int (*fn)(const char *cgroup, void *arg), 118 void *arg, int times) 119 { 120 char *child; 121 int i; 122 123 for (i = 0; i < times; i++) { 124 child = cg_name_indexed(parent, "child", i); 125 if (!child) 126 return -1; 127 128 if (cg_create(child)) { 129 cg_destroy(child); 130 free(child); 131 return -1; 132 } 133 134 if (cg_run(child, fn, NULL)) { 135 cg_destroy(child); 136 free(child); 137 return -1; 138 } 139 140 cg_destroy(child); 141 free(child); 142 } 143 144 return 0; 145 } 146 147 /* 148 * The test creates and destroys a large number of cgroups. In each cgroup it 149 * allocates some slab memory (mostly negative dentries) using 2 * NR_CPUS 150 * threads. Then it checks the sanity of numbers on the parent level: 151 * the total size of the cgroups should be roughly equal to 152 * anon + file + slab + kernel_stack. 153 */ 154 static int test_kmem_memcg_deletion(const char *root) 155 { 156 long current, slab, anon, file, kernel_stack, sum; 157 int ret = KSFT_FAIL; 158 char *parent; 159 160 parent = cg_name(root, "kmem_memcg_deletion_test"); 161 if (!parent) 162 goto cleanup; 163 164 if (cg_create(parent)) 165 goto cleanup; 166 167 if (cg_write(parent, "cgroup.subtree_control", "+memory")) 168 goto cleanup; 169 170 if (cg_run_in_subcgroups(parent, alloc_kmem_smp, NULL, 100)) 171 goto cleanup; 172 173 current = cg_read_long(parent, "memory.current"); 174 slab = cg_read_key_long(parent, "memory.stat", "slab "); 175 anon = cg_read_key_long(parent, "memory.stat", "anon "); 176 file = cg_read_key_long(parent, "memory.stat", "file "); 177 kernel_stack = cg_read_key_long(parent, "memory.stat", "kernel_stack "); 178 if (current < 0 || slab < 0 || anon < 0 || file < 0 || 179 kernel_stack < 0) 180 goto cleanup; 181 182 sum = slab + anon + file + kernel_stack; 183 if (abs(sum - current) < 4096 * 32 * 2 * get_nprocs()) { 184 ret = KSFT_PASS; 185 } else { 186 printf("memory.current = %ld\n", current); 187 printf("slab + anon + file + kernel_stack = %ld\n", sum); 188 printf("slab = %ld\n", slab); 189 printf("anon = %ld\n", anon); 190 printf("file = %ld\n", file); 191 printf("kernel_stack = %ld\n", kernel_stack); 192 } 193 194 cleanup: 195 cg_destroy(parent); 196 free(parent); 197 198 return ret; 199 } 200 201 /* 202 * The test reads the entire /proc/kpagecgroup. If the operation went 203 * successfully (and the kernel didn't panic), the test is treated as passed. 204 */ 205 static int test_kmem_proc_kpagecgroup(const char *root) 206 { 207 unsigned long buf[128]; 208 int ret = KSFT_FAIL; 209 ssize_t len; 210 int fd; 211 212 fd = open("/proc/kpagecgroup", O_RDONLY); 213 if (fd < 0) 214 return ret; 215 216 do { 217 len = read(fd, buf, sizeof(buf)); 218 } while (len > 0); 219 220 if (len == 0) 221 ret = KSFT_PASS; 222 223 close(fd); 224 return ret; 225 } 226 227 static void *pthread_wait_fn(void *arg) 228 { 229 sleep(100); 230 return NULL; 231 } 232 233 static int spawn_1000_threads(const char *cgroup, void *arg) 234 { 235 int nr_threads = 1000; 236 pthread_t *tinfo; 237 unsigned long i; 238 long stack; 239 int ret = -1; 240 241 tinfo = calloc(nr_threads, sizeof(pthread_t)); 242 if (tinfo == NULL) 243 return -1; 244 245 for (i = 0; i < nr_threads; i++) { 246 if (pthread_create(&tinfo[i], NULL, &pthread_wait_fn, 247 (void *)i)) { 248 free(tinfo); 249 return(-1); 250 } 251 } 252 253 stack = cg_read_key_long(cgroup, "memory.stat", "kernel_stack "); 254 if (stack >= 4096 * 1000) 255 ret = 0; 256 257 free(tinfo); 258 return ret; 259 } 260 261 /* 262 * The test spawns a process, which spawns 1000 threads. Then it checks 263 * that memory.stat's kernel_stack is at least 1000 pages large. 264 */ 265 static int test_kmem_kernel_stacks(const char *root) 266 { 267 int ret = KSFT_FAIL; 268 char *cg = NULL; 269 270 cg = cg_name(root, "kmem_kernel_stacks_test"); 271 if (!cg) 272 goto cleanup; 273 274 if (cg_create(cg)) 275 goto cleanup; 276 277 if (cg_run(cg, spawn_1000_threads, NULL)) 278 goto cleanup; 279 280 ret = KSFT_PASS; 281 cleanup: 282 cg_destroy(cg); 283 free(cg); 284 285 return ret; 286 } 287 288 /* 289 * This test sequentionally creates 30 child cgroups, allocates some 290 * kernel memory in each of them, and deletes them. Then it checks 291 * that the number of dying cgroups on the parent level is 0. 292 */ 293 static int test_kmem_dead_cgroups(const char *root) 294 { 295 int ret = KSFT_FAIL; 296 char *parent; 297 long dead; 298 int i; 299 300 parent = cg_name(root, "kmem_dead_cgroups_test"); 301 if (!parent) 302 goto cleanup; 303 304 if (cg_create(parent)) 305 goto cleanup; 306 307 if (cg_write(parent, "cgroup.subtree_control", "+memory")) 308 goto cleanup; 309 310 if (cg_run_in_subcgroups(parent, alloc_dcache, (void *)100, 30)) 311 goto cleanup; 312 313 for (i = 0; i < 5; i++) { 314 dead = cg_read_key_long(parent, "cgroup.stat", 315 "nr_dying_descendants "); 316 if (dead == 0) { 317 ret = KSFT_PASS; 318 break; 319 } 320 /* 321 * Reclaiming cgroups might take some time, 322 * let's wait a bit and repeat. 323 */ 324 sleep(1); 325 } 326 327 cleanup: 328 cg_destroy(parent); 329 free(parent); 330 331 return ret; 332 } 333 334 #define T(x) { x, #x } 335 struct kmem_test { 336 int (*fn)(const char *root); 337 const char *name; 338 } tests[] = { 339 T(test_kmem_basic), 340 T(test_kmem_memcg_deletion), 341 T(test_kmem_proc_kpagecgroup), 342 T(test_kmem_kernel_stacks), 343 T(test_kmem_dead_cgroups), 344 }; 345 #undef T 346 347 int main(int argc, char **argv) 348 { 349 char root[PATH_MAX]; 350 int i, ret = EXIT_SUCCESS; 351 352 if (cg_find_unified_root(root, sizeof(root))) 353 ksft_exit_skip("cgroup v2 isn't mounted\n"); 354 355 /* 356 * Check that memory controller is available: 357 * memory is listed in cgroup.controllers 358 */ 359 if (cg_read_strstr(root, "cgroup.controllers", "memory")) 360 ksft_exit_skip("memory controller isn't available\n"); 361 362 if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) 363 if (cg_write(root, "cgroup.subtree_control", "+memory")) 364 ksft_exit_skip("Failed to set memory controller\n"); 365 366 for (i = 0; i < ARRAY_SIZE(tests); i++) { 367 switch (tests[i].fn(root)) { 368 case KSFT_PASS: 369 ksft_test_result_pass("%s\n", tests[i].name); 370 break; 371 case KSFT_SKIP: 372 ksft_test_result_skip("%s\n", tests[i].name); 373 break; 374 default: 375 ret = EXIT_FAILURE; 376 ksft_test_result_fail("%s\n", tests[i].name); 377 break; 378 } 379 } 380 381 return ret; 382 } 383