1 #if defined __amd64__ || defined __i386__ 2 /* 3 * Copyright (c) 2022 Alexey Dobriyan <adobriyan@gmail.com> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 /* 18 * Create a process without mappings by unmapping everything at once and 19 * holding it with ptrace(2). See what happens to 20 * 21 * /proc/${pid}/maps 22 * /proc/${pid}/numa_maps 23 * /proc/${pid}/smaps 24 * /proc/${pid}/smaps_rollup 25 */ 26 #undef NDEBUG 27 #include <assert.h> 28 #include <errno.h> 29 #include <stdint.h> 30 #include <stdio.h> 31 #include <stdlib.h> 32 #include <string.h> 33 #include <fcntl.h> 34 #include <sys/mman.h> 35 #include <sys/ptrace.h> 36 #include <sys/resource.h> 37 #include <sys/types.h> 38 #include <sys/wait.h> 39 #include <unistd.h> 40 41 #ifdef __amd64__ 42 #define TEST_VSYSCALL 43 #endif 44 45 /* 46 * 0: vsyscall VMA doesn't exist vsyscall=none 47 * 1: vsyscall VMA is --xp vsyscall=xonly 48 * 2: vsyscall VMA is r-xp vsyscall=emulate 49 */ 50 static volatile int g_vsyscall; 51 static const char *g_proc_pid_maps_vsyscall; 52 static const char *g_proc_pid_smaps_vsyscall; 53 54 static const char proc_pid_maps_vsyscall_0[] = ""; 55 static const char proc_pid_maps_vsyscall_1[] = 56 "ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]\n"; 57 static const char proc_pid_maps_vsyscall_2[] = 58 "ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n"; 59 60 static const char proc_pid_smaps_vsyscall_0[] = ""; 61 62 static const char proc_pid_smaps_vsyscall_1[] = 63 "ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]\n" 64 "Size: 4 kB\n" 65 "KernelPageSize: 4 kB\n" 66 "MMUPageSize: 4 kB\n" 67 "Rss: 0 kB\n" 68 "Pss: 0 kB\n" 69 "Pss_Dirty: 0 kB\n" 70 "Shared_Clean: 0 kB\n" 71 "Shared_Dirty: 0 kB\n" 72 "Private_Clean: 0 kB\n" 73 "Private_Dirty: 0 kB\n" 74 "Referenced: 0 kB\n" 75 "Anonymous: 0 kB\n" 76 "KSM: 0 kB\n" 77 "LazyFree: 0 kB\n" 78 "AnonHugePages: 0 kB\n" 79 "ShmemPmdMapped: 0 kB\n" 80 "FilePmdMapped: 0 kB\n" 81 "Shared_Hugetlb: 0 kB\n" 82 "Private_Hugetlb: 0 kB\n" 83 "Swap: 0 kB\n" 84 "SwapPss: 0 kB\n" 85 "Locked: 0 kB\n" 86 "THPeligible: 0\n" 87 /* 88 * "ProtectionKey:" field is conditional. It is possible to check it as well, 89 * but I don't have such machine. 90 */ 91 ; 92 93 static const char proc_pid_smaps_vsyscall_2[] = 94 "ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n" 95 "Size: 4 kB\n" 96 "KernelPageSize: 4 kB\n" 97 "MMUPageSize: 4 kB\n" 98 "Rss: 0 kB\n" 99 "Pss: 0 kB\n" 100 "Pss_Dirty: 0 kB\n" 101 "Shared_Clean: 0 kB\n" 102 "Shared_Dirty: 0 kB\n" 103 "Private_Clean: 0 kB\n" 104 "Private_Dirty: 0 kB\n" 105 "Referenced: 0 kB\n" 106 "Anonymous: 0 kB\n" 107 "KSM: 0 kB\n" 108 "LazyFree: 0 kB\n" 109 "AnonHugePages: 0 kB\n" 110 "ShmemPmdMapped: 0 kB\n" 111 "FilePmdMapped: 0 kB\n" 112 "Shared_Hugetlb: 0 kB\n" 113 "Private_Hugetlb: 0 kB\n" 114 "Swap: 0 kB\n" 115 "SwapPss: 0 kB\n" 116 "Locked: 0 kB\n" 117 "THPeligible: 0\n" 118 /* 119 * "ProtectionKey:" field is conditional. It is possible to check it as well, 120 * but I'm too tired. 121 */ 122 ; 123 124 static void sigaction_SIGSEGV(int _, siginfo_t *__, void *___) 125 { 126 _exit(EXIT_FAILURE); 127 } 128 129 #ifdef TEST_VSYSCALL 130 static void sigaction_SIGSEGV_vsyscall(int _, siginfo_t *__, void *___) 131 { 132 _exit(g_vsyscall); 133 } 134 135 /* 136 * vsyscall page can't be unmapped, probe it directly. 137 */ 138 static void vsyscall(void) 139 { 140 pid_t pid; 141 int wstatus; 142 143 pid = fork(); 144 if (pid < 0) { 145 fprintf(stderr, "fork, errno %d\n", errno); 146 exit(1); 147 } 148 if (pid == 0) { 149 setrlimit(RLIMIT_CORE, &(struct rlimit){}); 150 151 /* Hide "segfault at ffffffffff600000" messages. */ 152 struct sigaction act = {}; 153 act.sa_flags = SA_SIGINFO; 154 act.sa_sigaction = sigaction_SIGSEGV_vsyscall; 155 sigaction(SIGSEGV, &act, NULL); 156 157 g_vsyscall = 0; 158 /* gettimeofday(NULL, NULL); */ 159 uint64_t rax = 0xffffffffff600000; 160 asm volatile ( 161 "call *%[rax]" 162 : [rax] "+a" (rax) 163 : "D" (NULL), "S" (NULL) 164 : "rcx", "r11" 165 ); 166 167 g_vsyscall = 1; 168 *(volatile int *)0xffffffffff600000UL; 169 170 g_vsyscall = 2; 171 exit(g_vsyscall); 172 } 173 waitpid(pid, &wstatus, 0); 174 if (WIFEXITED(wstatus)) { 175 g_vsyscall = WEXITSTATUS(wstatus); 176 } else { 177 fprintf(stderr, "error: vsyscall wstatus %08x\n", wstatus); 178 exit(1); 179 } 180 } 181 #endif 182 183 static int test_proc_pid_maps(pid_t pid) 184 { 185 char buf[4096]; 186 snprintf(buf, sizeof(buf), "/proc/%u/maps", pid); 187 int fd = open(buf, O_RDONLY); 188 if (fd == -1) { 189 perror("open /proc/${pid}/maps"); 190 return EXIT_FAILURE; 191 } else { 192 ssize_t rv = read(fd, buf, sizeof(buf)); 193 close(fd); 194 if (g_vsyscall == 0) { 195 assert(rv == 0); 196 } else { 197 size_t len = strlen(g_proc_pid_maps_vsyscall); 198 assert(rv == len); 199 assert(memcmp(buf, g_proc_pid_maps_vsyscall, len) == 0); 200 } 201 return EXIT_SUCCESS; 202 } 203 } 204 205 static int test_proc_pid_numa_maps(pid_t pid) 206 { 207 char buf[4096]; 208 snprintf(buf, sizeof(buf), "/proc/%u/numa_maps", pid); 209 int fd = open(buf, O_RDONLY); 210 if (fd == -1) { 211 if (errno == ENOENT) { 212 /* 213 * /proc/${pid}/numa_maps is under CONFIG_NUMA, 214 * it doesn't necessarily exist. 215 */ 216 return EXIT_SUCCESS; 217 } 218 perror("open /proc/${pid}/numa_maps"); 219 return EXIT_FAILURE; 220 } else { 221 ssize_t rv = read(fd, buf, sizeof(buf)); 222 close(fd); 223 assert(rv == 0); 224 return EXIT_SUCCESS; 225 } 226 } 227 228 static int test_proc_pid_smaps(pid_t pid) 229 { 230 char buf[4096]; 231 snprintf(buf, sizeof(buf), "/proc/%u/smaps", pid); 232 int fd = open(buf, O_RDONLY); 233 if (fd == -1) { 234 if (errno == ENOENT) { 235 /* 236 * /proc/${pid}/smaps is under CONFIG_PROC_PAGE_MONITOR, 237 * it doesn't necessarily exist. 238 */ 239 return EXIT_SUCCESS; 240 } 241 perror("open /proc/${pid}/smaps"); 242 return EXIT_FAILURE; 243 } else { 244 ssize_t rv = read(fd, buf, sizeof(buf)); 245 close(fd); 246 if (g_vsyscall == 0) { 247 assert(rv == 0); 248 } else { 249 size_t len = strlen(g_proc_pid_smaps_vsyscall); 250 /* TODO "ProtectionKey:" */ 251 assert(rv > len); 252 assert(memcmp(buf, g_proc_pid_smaps_vsyscall, len) == 0); 253 } 254 return EXIT_SUCCESS; 255 } 256 } 257 258 static const char g_smaps_rollup[] = 259 "00000000-00000000 ---p 00000000 00:00 0 [rollup]\n" 260 "Rss: 0 kB\n" 261 "Pss: 0 kB\n" 262 "Pss_Dirty: 0 kB\n" 263 "Pss_Anon: 0 kB\n" 264 "Pss_File: 0 kB\n" 265 "Pss_Shmem: 0 kB\n" 266 "Shared_Clean: 0 kB\n" 267 "Shared_Dirty: 0 kB\n" 268 "Private_Clean: 0 kB\n" 269 "Private_Dirty: 0 kB\n" 270 "Referenced: 0 kB\n" 271 "Anonymous: 0 kB\n" 272 "KSM: 0 kB\n" 273 "LazyFree: 0 kB\n" 274 "AnonHugePages: 0 kB\n" 275 "ShmemPmdMapped: 0 kB\n" 276 "FilePmdMapped: 0 kB\n" 277 "Shared_Hugetlb: 0 kB\n" 278 "Private_Hugetlb: 0 kB\n" 279 "Swap: 0 kB\n" 280 "SwapPss: 0 kB\n" 281 "Locked: 0 kB\n" 282 ; 283 284 static int test_proc_pid_smaps_rollup(pid_t pid) 285 { 286 char buf[4096]; 287 snprintf(buf, sizeof(buf), "/proc/%u/smaps_rollup", pid); 288 int fd = open(buf, O_RDONLY); 289 if (fd == -1) { 290 if (errno == ENOENT) { 291 /* 292 * /proc/${pid}/smaps_rollup is under CONFIG_PROC_PAGE_MONITOR, 293 * it doesn't necessarily exist. 294 */ 295 return EXIT_SUCCESS; 296 } 297 perror("open /proc/${pid}/smaps_rollup"); 298 return EXIT_FAILURE; 299 } else { 300 ssize_t rv = read(fd, buf, sizeof(buf)); 301 close(fd); 302 assert(rv == sizeof(g_smaps_rollup) - 1); 303 assert(memcmp(buf, g_smaps_rollup, sizeof(g_smaps_rollup) - 1) == 0); 304 return EXIT_SUCCESS; 305 } 306 } 307 308 static const char *parse_u64(const char *p, const char *const end, uint64_t *rv) 309 { 310 *rv = 0; 311 for (; p != end; p += 1) { 312 if ('0' <= *p && *p <= '9') { 313 assert(!__builtin_mul_overflow(*rv, 10, rv)); 314 assert(!__builtin_add_overflow(*rv, *p - '0', rv)); 315 } else { 316 break; 317 } 318 } 319 assert(p != end); 320 return p; 321 } 322 323 /* 324 * There seems to be 2 types of valid output: 325 * "0 A A B 0 0 0\n" for dynamic exeuctables, 326 * "0 0 0 B 0 0 0\n" for static executables. 327 */ 328 static int test_proc_pid_statm(pid_t pid) 329 { 330 char buf[4096]; 331 snprintf(buf, sizeof(buf), "/proc/%u/statm", pid); 332 int fd = open(buf, O_RDONLY); 333 if (fd == -1) { 334 perror("open /proc/${pid}/statm"); 335 return EXIT_FAILURE; 336 } 337 338 ssize_t rv = read(fd, buf, sizeof(buf)); 339 close(fd); 340 341 assert(rv >= 0); 342 assert(rv <= sizeof(buf)); 343 if (0) { 344 write(1, buf, rv); 345 } 346 347 const char *p = buf; 348 const char *const end = p + rv; 349 350 /* size */ 351 assert(p != end && *p++ == '0'); 352 assert(p != end && *p++ == ' '); 353 354 uint64_t resident; 355 p = parse_u64(p, end, &resident); 356 assert(p != end && *p++ == ' '); 357 358 uint64_t shared; 359 p = parse_u64(p, end, &shared); 360 assert(p != end && *p++ == ' '); 361 362 uint64_t text; 363 p = parse_u64(p, end, &text); 364 assert(p != end && *p++ == ' '); 365 366 assert(p != end && *p++ == '0'); 367 assert(p != end && *p++ == ' '); 368 369 /* data */ 370 assert(p != end && *p++ == '0'); 371 assert(p != end && *p++ == ' '); 372 373 assert(p != end && *p++ == '0'); 374 assert(p != end && *p++ == '\n'); 375 376 assert(p == end); 377 378 /* 379 * "text" is "mm->end_code - mm->start_code" at execve(2) time. 380 * munmap() doesn't change it. It can be anything (just link 381 * statically). It can't be 0 because executing to this point 382 * implies at least 1 page of code. 383 */ 384 assert(text > 0); 385 386 /* 387 * These two are always equal. Always 0 for statically linked 388 * executables and sometimes 0 for dynamically linked executables. 389 * There is no way to tell one from another without parsing ELF 390 * which is too much for this test. 391 */ 392 assert(resident == shared); 393 394 return EXIT_SUCCESS; 395 } 396 397 int main(void) 398 { 399 int rv = EXIT_SUCCESS; 400 401 #ifdef TEST_VSYSCALL 402 vsyscall(); 403 #endif 404 405 switch (g_vsyscall) { 406 case 0: 407 g_proc_pid_maps_vsyscall = proc_pid_maps_vsyscall_0; 408 g_proc_pid_smaps_vsyscall = proc_pid_smaps_vsyscall_0; 409 break; 410 case 1: 411 g_proc_pid_maps_vsyscall = proc_pid_maps_vsyscall_1; 412 g_proc_pid_smaps_vsyscall = proc_pid_smaps_vsyscall_1; 413 break; 414 case 2: 415 g_proc_pid_maps_vsyscall = proc_pid_maps_vsyscall_2; 416 g_proc_pid_smaps_vsyscall = proc_pid_smaps_vsyscall_2; 417 break; 418 default: 419 abort(); 420 } 421 422 pid_t pid = fork(); 423 if (pid == -1) { 424 perror("fork"); 425 return EXIT_FAILURE; 426 } else if (pid == 0) { 427 rv = ptrace(PTRACE_TRACEME, 0, NULL, NULL); 428 if (rv != 0) { 429 if (errno == EPERM) { 430 fprintf(stderr, 431 "Did you know? ptrace(PTRACE_TRACEME) doesn't work under strace.\n" 432 ); 433 kill(getppid(), SIGTERM); 434 return EXIT_FAILURE; 435 } 436 perror("ptrace PTRACE_TRACEME"); 437 return EXIT_FAILURE; 438 } 439 440 /* 441 * Hide "segfault at ..." messages. Signal handler won't run. 442 */ 443 struct sigaction act = {}; 444 act.sa_flags = SA_SIGINFO; 445 act.sa_sigaction = sigaction_SIGSEGV; 446 sigaction(SIGSEGV, &act, NULL); 447 448 #ifdef __amd64__ 449 munmap(NULL, ((size_t)1 << 47) - 4096); 450 #elif defined __i386__ 451 { 452 size_t len; 453 454 for (len = -4096;; len -= 4096) { 455 munmap(NULL, len); 456 } 457 } 458 #else 459 #error "implement 'unmap everything'" 460 #endif 461 return EXIT_FAILURE; 462 } else { 463 /* 464 * TODO find reliable way to signal parent that munmap(2) completed. 465 * Child can't do it directly because it effectively doesn't exist 466 * anymore. Looking at child's VM files isn't 100% reliable either: 467 * due to a bug they may not become empty or empty-like. 468 */ 469 sleep(1); 470 471 if (rv == EXIT_SUCCESS) { 472 rv = test_proc_pid_maps(pid); 473 } 474 if (rv == EXIT_SUCCESS) { 475 rv = test_proc_pid_numa_maps(pid); 476 } 477 if (rv == EXIT_SUCCESS) { 478 rv = test_proc_pid_smaps(pid); 479 } 480 if (rv == EXIT_SUCCESS) { 481 rv = test_proc_pid_smaps_rollup(pid); 482 } 483 if (rv == EXIT_SUCCESS) { 484 rv = test_proc_pid_statm(pid); 485 } 486 487 /* Cut the rope. */ 488 int wstatus; 489 waitpid(pid, &wstatus, 0); 490 assert(WIFSTOPPED(wstatus)); 491 assert(WSTOPSIG(wstatus) == SIGSEGV); 492 } 493 494 return rv; 495 } 496 #else 497 int main(void) 498 { 499 return 4; 500 } 501 #endif 502