1 #if defined __amd64__ || defined __i386__ 2 /* 3 * Copyright (c) 2022 Alexey Dobriyan <adobriyan@gmail.com> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 /* 18 * Create a process without mappings by unmapping everything at once and 19 * holding it with ptrace(2). See what happens to 20 * 21 * /proc/${pid}/maps 22 * /proc/${pid}/numa_maps 23 * /proc/${pid}/smaps 24 * /proc/${pid}/smaps_rollup 25 */ 26 #undef _GNU_SOURCE 27 #define _GNU_SOURCE 28 29 #undef NDEBUG 30 #include <assert.h> 31 #include <errno.h> 32 #include <stdint.h> 33 #include <stdio.h> 34 #include <stdlib.h> 35 #include <string.h> 36 #include <fcntl.h> 37 #include <sys/mman.h> 38 #include <sys/ptrace.h> 39 #include <sys/resource.h> 40 #include <sys/syscall.h> 41 #include <sys/types.h> 42 #include <sys/wait.h> 43 #include <unistd.h> 44 45 #ifdef __amd64__ 46 #define TEST_VSYSCALL 47 #endif 48 49 #if defined __amd64__ 50 #ifndef SYS_pkey_alloc 51 #define SYS_pkey_alloc 330 52 #endif 53 #ifndef SYS_pkey_free 54 #define SYS_pkey_free 331 55 #endif 56 #elif defined __i386__ 57 #ifndef SYS_pkey_alloc 58 #define SYS_pkey_alloc 381 59 #endif 60 #ifndef SYS_pkey_free 61 #define SYS_pkey_free 382 62 #endif 63 #else 64 #error "SYS_pkey_alloc" 65 #endif 66 67 static int g_protection_key_support; 68 69 static int protection_key_support(void) 70 { 71 long rv = syscall(SYS_pkey_alloc, 0, 0); 72 if (rv > 0) { 73 syscall(SYS_pkey_free, (int)rv); 74 return 1; 75 } else if (rv == -1 && errno == ENOSYS) { 76 return 0; 77 } else if (rv == -1 && errno == EINVAL) { 78 // ospke=n 79 return 0; 80 } else { 81 fprintf(stderr, "%s: error: rv %ld, errno %d\n", __func__, rv, errno); 82 exit(EXIT_FAILURE); 83 } 84 } 85 86 /* 87 * 0: vsyscall VMA doesn't exist vsyscall=none 88 * 1: vsyscall VMA is --xp vsyscall=xonly 89 * 2: vsyscall VMA is r-xp vsyscall=emulate 90 */ 91 static volatile int g_vsyscall; 92 static const char *g_proc_pid_maps_vsyscall; 93 static const char *g_proc_pid_smaps_vsyscall; 94 95 static const char proc_pid_maps_vsyscall_0[] = ""; 96 static const char proc_pid_maps_vsyscall_1[] = 97 "ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]\n"; 98 static const char proc_pid_maps_vsyscall_2[] = 99 "ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n"; 100 101 static const char proc_pid_smaps_vsyscall_0[] = ""; 102 103 static const char proc_pid_smaps_vsyscall_1[] = 104 "ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]\n" 105 "Size: 4 kB\n" 106 "KernelPageSize: 4 kB\n" 107 "MMUPageSize: 4 kB\n" 108 "Rss: 0 kB\n" 109 "Pss: 0 kB\n" 110 "Pss_Dirty: 0 kB\n" 111 "Shared_Clean: 0 kB\n" 112 "Shared_Dirty: 0 kB\n" 113 "Private_Clean: 0 kB\n" 114 "Private_Dirty: 0 kB\n" 115 "Referenced: 0 kB\n" 116 "Anonymous: 0 kB\n" 117 "KSM: 0 kB\n" 118 "LazyFree: 0 kB\n" 119 "AnonHugePages: 0 kB\n" 120 "ShmemPmdMapped: 0 kB\n" 121 "FilePmdMapped: 0 kB\n" 122 "Shared_Hugetlb: 0 kB\n" 123 "Private_Hugetlb: 0 kB\n" 124 "Swap: 0 kB\n" 125 "SwapPss: 0 kB\n" 126 "Locked: 0 kB\n" 127 "THPeligible: 0\n" 128 ; 129 130 static const char proc_pid_smaps_vsyscall_2[] = 131 "ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n" 132 "Size: 4 kB\n" 133 "KernelPageSize: 4 kB\n" 134 "MMUPageSize: 4 kB\n" 135 "Rss: 0 kB\n" 136 "Pss: 0 kB\n" 137 "Pss_Dirty: 0 kB\n" 138 "Shared_Clean: 0 kB\n" 139 "Shared_Dirty: 0 kB\n" 140 "Private_Clean: 0 kB\n" 141 "Private_Dirty: 0 kB\n" 142 "Referenced: 0 kB\n" 143 "Anonymous: 0 kB\n" 144 "KSM: 0 kB\n" 145 "LazyFree: 0 kB\n" 146 "AnonHugePages: 0 kB\n" 147 "ShmemPmdMapped: 0 kB\n" 148 "FilePmdMapped: 0 kB\n" 149 "Shared_Hugetlb: 0 kB\n" 150 "Private_Hugetlb: 0 kB\n" 151 "Swap: 0 kB\n" 152 "SwapPss: 0 kB\n" 153 "Locked: 0 kB\n" 154 "THPeligible: 0\n" 155 ; 156 157 static void sigaction_SIGSEGV(int _, siginfo_t *__, void *___) 158 { 159 _exit(EXIT_FAILURE); 160 } 161 162 #ifdef TEST_VSYSCALL 163 static void sigaction_SIGSEGV_vsyscall(int _, siginfo_t *__, void *___) 164 { 165 _exit(g_vsyscall); 166 } 167 168 /* 169 * vsyscall page can't be unmapped, probe it directly. 170 */ 171 static void vsyscall(void) 172 { 173 pid_t pid; 174 int wstatus; 175 176 pid = fork(); 177 if (pid < 0) { 178 fprintf(stderr, "fork, errno %d\n", errno); 179 exit(1); 180 } 181 if (pid == 0) { 182 setrlimit(RLIMIT_CORE, &(struct rlimit){}); 183 184 /* Hide "segfault at ffffffffff600000" messages. */ 185 struct sigaction act = {}; 186 act.sa_flags = SA_SIGINFO; 187 act.sa_sigaction = sigaction_SIGSEGV_vsyscall; 188 sigaction(SIGSEGV, &act, NULL); 189 190 g_vsyscall = 0; 191 /* gettimeofday(NULL, NULL); */ 192 uint64_t rax = 0xffffffffff600000; 193 asm volatile ( 194 "call *%[rax]" 195 : [rax] "+a" (rax) 196 : "D" (NULL), "S" (NULL) 197 : "rcx", "r11" 198 ); 199 200 g_vsyscall = 1; 201 *(volatile int *)0xffffffffff600000UL; 202 203 g_vsyscall = 2; 204 exit(g_vsyscall); 205 } 206 waitpid(pid, &wstatus, 0); 207 if (WIFEXITED(wstatus)) { 208 g_vsyscall = WEXITSTATUS(wstatus); 209 } else { 210 fprintf(stderr, "error: vsyscall wstatus %08x\n", wstatus); 211 exit(1); 212 } 213 } 214 #endif 215 216 static int test_proc_pid_maps(pid_t pid) 217 { 218 char buf[4096]; 219 snprintf(buf, sizeof(buf), "/proc/%u/maps", pid); 220 int fd = open(buf, O_RDONLY); 221 if (fd == -1) { 222 perror("open /proc/${pid}/maps"); 223 return EXIT_FAILURE; 224 } else { 225 ssize_t rv = read(fd, buf, sizeof(buf)); 226 close(fd); 227 if (g_vsyscall == 0) { 228 assert(rv == 0); 229 } else { 230 size_t len = strlen(g_proc_pid_maps_vsyscall); 231 assert(rv == len); 232 assert(memcmp(buf, g_proc_pid_maps_vsyscall, len) == 0); 233 } 234 return EXIT_SUCCESS; 235 } 236 } 237 238 static int test_proc_pid_numa_maps(pid_t pid) 239 { 240 char buf[4096]; 241 snprintf(buf, sizeof(buf), "/proc/%u/numa_maps", pid); 242 int fd = open(buf, O_RDONLY); 243 if (fd == -1) { 244 if (errno == ENOENT) { 245 /* 246 * /proc/${pid}/numa_maps is under CONFIG_NUMA, 247 * it doesn't necessarily exist. 248 */ 249 return EXIT_SUCCESS; 250 } 251 perror("open /proc/${pid}/numa_maps"); 252 return EXIT_FAILURE; 253 } else { 254 ssize_t rv = read(fd, buf, sizeof(buf)); 255 close(fd); 256 assert(rv == 0); 257 return EXIT_SUCCESS; 258 } 259 } 260 261 static int test_proc_pid_smaps(pid_t pid) 262 { 263 char buf[4096]; 264 snprintf(buf, sizeof(buf), "/proc/%u/smaps", pid); 265 int fd = open(buf, O_RDONLY); 266 if (fd == -1) { 267 if (errno == ENOENT) { 268 /* 269 * /proc/${pid}/smaps is under CONFIG_PROC_PAGE_MONITOR, 270 * it doesn't necessarily exist. 271 */ 272 return EXIT_SUCCESS; 273 } 274 perror("open /proc/${pid}/smaps"); 275 return EXIT_FAILURE; 276 } 277 ssize_t rv = read(fd, buf, sizeof(buf)); 278 close(fd); 279 280 assert(0 <= rv); 281 assert(rv <= sizeof(buf)); 282 283 if (g_vsyscall == 0) { 284 assert(rv == 0); 285 } else { 286 size_t len = strlen(g_proc_pid_smaps_vsyscall); 287 assert(rv > len); 288 assert(memcmp(buf, g_proc_pid_smaps_vsyscall, len) == 0); 289 290 if (g_protection_key_support) { 291 #define PROTECTION_KEY "ProtectionKey: 0\n" 292 assert(memmem(buf, rv, PROTECTION_KEY, strlen(PROTECTION_KEY))); 293 } 294 } 295 296 return EXIT_SUCCESS; 297 } 298 299 static const char g_smaps_rollup[] = 300 "00000000-00000000 ---p 00000000 00:00 0 [rollup]\n" 301 "Rss: 0 kB\n" 302 "Pss: 0 kB\n" 303 "Pss_Dirty: 0 kB\n" 304 "Pss_Anon: 0 kB\n" 305 "Pss_File: 0 kB\n" 306 "Pss_Shmem: 0 kB\n" 307 "Shared_Clean: 0 kB\n" 308 "Shared_Dirty: 0 kB\n" 309 "Private_Clean: 0 kB\n" 310 "Private_Dirty: 0 kB\n" 311 "Referenced: 0 kB\n" 312 "Anonymous: 0 kB\n" 313 "KSM: 0 kB\n" 314 "LazyFree: 0 kB\n" 315 "AnonHugePages: 0 kB\n" 316 "ShmemPmdMapped: 0 kB\n" 317 "FilePmdMapped: 0 kB\n" 318 "Shared_Hugetlb: 0 kB\n" 319 "Private_Hugetlb: 0 kB\n" 320 "Swap: 0 kB\n" 321 "SwapPss: 0 kB\n" 322 "Locked: 0 kB\n" 323 ; 324 325 static int test_proc_pid_smaps_rollup(pid_t pid) 326 { 327 char buf[4096]; 328 snprintf(buf, sizeof(buf), "/proc/%u/smaps_rollup", pid); 329 int fd = open(buf, O_RDONLY); 330 if (fd == -1) { 331 if (errno == ENOENT) { 332 /* 333 * /proc/${pid}/smaps_rollup is under CONFIG_PROC_PAGE_MONITOR, 334 * it doesn't necessarily exist. 335 */ 336 return EXIT_SUCCESS; 337 } 338 perror("open /proc/${pid}/smaps_rollup"); 339 return EXIT_FAILURE; 340 } else { 341 ssize_t rv = read(fd, buf, sizeof(buf)); 342 close(fd); 343 assert(rv == sizeof(g_smaps_rollup) - 1); 344 assert(memcmp(buf, g_smaps_rollup, sizeof(g_smaps_rollup) - 1) == 0); 345 return EXIT_SUCCESS; 346 } 347 } 348 349 static const char *parse_u64(const char *p, const char *const end, uint64_t *rv) 350 { 351 *rv = 0; 352 for (; p != end; p += 1) { 353 if ('0' <= *p && *p <= '9') { 354 assert(!__builtin_mul_overflow(*rv, 10, rv)); 355 assert(!__builtin_add_overflow(*rv, *p - '0', rv)); 356 } else { 357 break; 358 } 359 } 360 assert(p != end); 361 return p; 362 } 363 364 /* 365 * There seems to be 2 types of valid output: 366 * "0 A A B 0 0 0\n" for dynamic exeuctables, 367 * "0 0 0 B 0 0 0\n" for static executables. 368 */ 369 static int test_proc_pid_statm(pid_t pid) 370 { 371 char buf[4096]; 372 snprintf(buf, sizeof(buf), "/proc/%u/statm", pid); 373 int fd = open(buf, O_RDONLY); 374 if (fd == -1) { 375 perror("open /proc/${pid}/statm"); 376 return EXIT_FAILURE; 377 } 378 379 ssize_t rv = read(fd, buf, sizeof(buf)); 380 close(fd); 381 382 assert(rv >= 0); 383 assert(rv <= sizeof(buf)); 384 385 const char *p = buf; 386 const char *const end = p + rv; 387 388 /* size */ 389 assert(p != end && *p++ == '0'); 390 assert(p != end && *p++ == ' '); 391 392 uint64_t resident; 393 p = parse_u64(p, end, &resident); 394 assert(p != end && *p++ == ' '); 395 396 uint64_t shared; 397 p = parse_u64(p, end, &shared); 398 assert(p != end && *p++ == ' '); 399 400 uint64_t text; 401 p = parse_u64(p, end, &text); 402 assert(p != end && *p++ == ' '); 403 404 assert(p != end && *p++ == '0'); 405 assert(p != end && *p++ == ' '); 406 407 /* data */ 408 assert(p != end && *p++ == '0'); 409 assert(p != end && *p++ == ' '); 410 411 assert(p != end && *p++ == '0'); 412 assert(p != end && *p++ == '\n'); 413 414 assert(p == end); 415 416 /* 417 * "text" is "mm->end_code - mm->start_code" at execve(2) time. 418 * munmap() doesn't change it. It can be anything (just link 419 * statically). It can't be 0 because executing to this point 420 * implies at least 1 page of code. 421 */ 422 assert(text > 0); 423 424 /* 425 * These two are always equal. Always 0 for statically linked 426 * executables and sometimes 0 for dynamically linked executables. 427 * There is no way to tell one from another without parsing ELF 428 * which is too much for this test. 429 */ 430 assert(resident == shared); 431 432 return EXIT_SUCCESS; 433 } 434 435 int main(void) 436 { 437 int rv = EXIT_SUCCESS; 438 439 #ifdef TEST_VSYSCALL 440 vsyscall(); 441 #endif 442 443 switch (g_vsyscall) { 444 case 0: 445 g_proc_pid_maps_vsyscall = proc_pid_maps_vsyscall_0; 446 g_proc_pid_smaps_vsyscall = proc_pid_smaps_vsyscall_0; 447 break; 448 case 1: 449 g_proc_pid_maps_vsyscall = proc_pid_maps_vsyscall_1; 450 g_proc_pid_smaps_vsyscall = proc_pid_smaps_vsyscall_1; 451 break; 452 case 2: 453 g_proc_pid_maps_vsyscall = proc_pid_maps_vsyscall_2; 454 g_proc_pid_smaps_vsyscall = proc_pid_smaps_vsyscall_2; 455 break; 456 default: 457 abort(); 458 } 459 460 g_protection_key_support = protection_key_support(); 461 462 pid_t pid = fork(); 463 if (pid == -1) { 464 perror("fork"); 465 return EXIT_FAILURE; 466 } else if (pid == 0) { 467 rv = ptrace(PTRACE_TRACEME, 0, NULL, NULL); 468 if (rv != 0) { 469 if (errno == EPERM) { 470 fprintf(stderr, 471 "Did you know? ptrace(PTRACE_TRACEME) doesn't work under strace.\n" 472 ); 473 kill(getppid(), SIGTERM); 474 return EXIT_FAILURE; 475 } 476 perror("ptrace PTRACE_TRACEME"); 477 return EXIT_FAILURE; 478 } 479 480 /* 481 * Hide "segfault at ..." messages. Signal handler won't run. 482 */ 483 struct sigaction act = {}; 484 act.sa_flags = SA_SIGINFO; 485 act.sa_sigaction = sigaction_SIGSEGV; 486 sigaction(SIGSEGV, &act, NULL); 487 488 #ifdef __amd64__ 489 munmap(NULL, ((size_t)1 << 47) - 4096); 490 #elif defined __i386__ 491 { 492 size_t len; 493 494 for (len = -4096;; len -= 4096) { 495 munmap(NULL, len); 496 } 497 } 498 #else 499 #error "implement 'unmap everything'" 500 #endif 501 return EXIT_FAILURE; 502 } else { 503 /* 504 * TODO find reliable way to signal parent that munmap(2) completed. 505 * Child can't do it directly because it effectively doesn't exist 506 * anymore. Looking at child's VM files isn't 100% reliable either: 507 * due to a bug they may not become empty or empty-like. 508 */ 509 sleep(1); 510 511 if (rv == EXIT_SUCCESS) { 512 rv = test_proc_pid_maps(pid); 513 } 514 if (rv == EXIT_SUCCESS) { 515 rv = test_proc_pid_numa_maps(pid); 516 } 517 if (rv == EXIT_SUCCESS) { 518 rv = test_proc_pid_smaps(pid); 519 } 520 if (rv == EXIT_SUCCESS) { 521 rv = test_proc_pid_smaps_rollup(pid); 522 } 523 if (rv == EXIT_SUCCESS) { 524 rv = test_proc_pid_statm(pid); 525 } 526 527 /* Cut the rope. */ 528 int wstatus; 529 waitpid(pid, &wstatus, 0); 530 assert(WIFSTOPPED(wstatus)); 531 assert(WSTOPSIG(wstatus) == SIGSEGV); 532 } 533 534 return rv; 535 } 536 #else 537 int main(void) 538 { 539 return 4; 540 } 541 #endif 542