1 /* 2 * Copyright (c) 2019 Alexey Dobriyan <adobriyan@gmail.com> 3 * 4 * Permission to use, copy, modify, and distribute this software for any 5 * purpose with or without fee is hereby granted, provided that the above 6 * copyright notice and this permission notice appear in all copies. 7 * 8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 */ 16 /* 17 * Fork and exec tiny 1 page executable which precisely controls its VM. 18 * Test /proc/$PID/maps 19 * Test /proc/$PID/smaps 20 * Test /proc/$PID/smaps_rollup 21 * Test /proc/$PID/statm 22 * 23 * FIXME require CONFIG_TMPFS which can be disabled 24 * FIXME test other values from "smaps" 25 * FIXME support other archs 26 */ 27 #undef NDEBUG 28 #include <assert.h> 29 #include <errno.h> 30 #include <sched.h> 31 #include <signal.h> 32 #include <stdbool.h> 33 #include <stdint.h> 34 #include <stdio.h> 35 #include <string.h> 36 #include <stdlib.h> 37 #include <sys/mount.h> 38 #include <sys/types.h> 39 #include <sys/stat.h> 40 #include <sys/wait.h> 41 #include <fcntl.h> 42 #include <unistd.h> 43 #include <sys/syscall.h> 44 #include <sys/uio.h> 45 #include <linux/kdev_t.h> 46 #include <sys/time.h> 47 #include <sys/resource.h> 48 #include <linux/fs.h> 49 50 #ifndef __maybe_unused 51 #define __maybe_unused __attribute__((__unused__)) 52 #endif 53 54 #include "../kselftest.h" 55 56 static inline long sys_execveat(int dirfd, const char *pathname, char **argv, char **envp, int flags) 57 { 58 return syscall(SYS_execveat, dirfd, pathname, argv, envp, flags); 59 } 60 61 static void make_private_tmp(void) 62 { 63 if (unshare(CLONE_NEWNS) == -1) { 64 if (errno == ENOSYS || errno == EPERM) { 65 exit(4); 66 } 67 exit(1); 68 } 69 if (mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) == -1) { 70 exit(1); 71 } 72 if (mount(NULL, "/tmp", "tmpfs", 0, NULL) == -1) { 73 exit(1); 74 } 75 } 76 77 static pid_t pid = -1; 78 static void ate(void) 79 { 80 if (pid > 0) { 81 kill(pid, SIGTERM); 82 } 83 } 84 85 struct elf64_hdr { 86 uint8_t e_ident[16]; 87 uint16_t e_type; 88 uint16_t e_machine; 89 uint32_t e_version; 90 uint64_t e_entry; 91 uint64_t e_phoff; 92 uint64_t e_shoff; 93 uint32_t e_flags; 94 uint16_t e_ehsize; 95 uint16_t e_phentsize; 96 uint16_t e_phnum; 97 uint16_t e_shentsize; 98 uint16_t e_shnum; 99 uint16_t e_shstrndx; 100 }; 101 102 struct elf64_phdr { 103 uint32_t p_type; 104 uint32_t p_flags; 105 uint64_t p_offset; 106 uint64_t p_vaddr; 107 uint64_t p_paddr; 108 uint64_t p_filesz; 109 uint64_t p_memsz; 110 uint64_t p_align; 111 }; 112 113 #ifdef __x86_64__ 114 #define PAGE_SIZE 4096 115 #define VADDR (1UL << 32) 116 #define MAPS_OFFSET 73 117 118 #define syscall 0x0f, 0x05 119 #define mov_rdi(x) \ 120 0x48, 0xbf, \ 121 (x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff, \ 122 ((x)>>32)&0xff, ((x)>>40)&0xff, ((x)>>48)&0xff, ((x)>>56)&0xff 123 124 #define mov_rsi(x) \ 125 0x48, 0xbe, \ 126 (x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff, \ 127 ((x)>>32)&0xff, ((x)>>40)&0xff, ((x)>>48)&0xff, ((x)>>56)&0xff 128 129 #define mov_eax(x) \ 130 0xb8, (x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff 131 132 static const uint8_t payload[] = { 133 /* Casually unmap stack, vDSO and everything else. */ 134 /* munmap */ 135 mov_rdi(VADDR + 4096), 136 mov_rsi((1ULL << 47) - 4096 - VADDR - 4096), 137 mov_eax(11), 138 syscall, 139 140 /* Ping parent. */ 141 /* write(0, &c, 1); */ 142 0x31, 0xff, /* xor edi, edi */ 143 0x48, 0x8d, 0x35, 0x00, 0x00, 0x00, 0x00, /* lea rsi, [rip] */ 144 0xba, 0x01, 0x00, 0x00, 0x00, /* mov edx, 1 */ 145 mov_eax(1), 146 syscall, 147 148 /* 1: pause(); */ 149 mov_eax(34), 150 syscall, 151 152 0xeb, 0xf7, /* jmp 1b */ 153 }; 154 155 static int make_exe(const uint8_t *payload, size_t len) 156 { 157 struct elf64_hdr h; 158 struct elf64_phdr ph; 159 160 struct iovec iov[3] = { 161 {&h, sizeof(struct elf64_hdr)}, 162 {&ph, sizeof(struct elf64_phdr)}, 163 {(void *)payload, len}, 164 }; 165 int fd, fd1; 166 char buf[64]; 167 168 memset(&h, 0, sizeof(h)); 169 h.e_ident[0] = 0x7f; 170 h.e_ident[1] = 'E'; 171 h.e_ident[2] = 'L'; 172 h.e_ident[3] = 'F'; 173 h.e_ident[4] = 2; 174 h.e_ident[5] = 1; 175 h.e_ident[6] = 1; 176 h.e_ident[7] = 0; 177 h.e_type = 2; 178 h.e_machine = 0x3e; 179 h.e_version = 1; 180 h.e_entry = VADDR + sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr); 181 h.e_phoff = sizeof(struct elf64_hdr); 182 h.e_shoff = 0; 183 h.e_flags = 0; 184 h.e_ehsize = sizeof(struct elf64_hdr); 185 h.e_phentsize = sizeof(struct elf64_phdr); 186 h.e_phnum = 1; 187 h.e_shentsize = 0; 188 h.e_shnum = 0; 189 h.e_shstrndx = 0; 190 191 memset(&ph, 0, sizeof(ph)); 192 ph.p_type = 1; 193 ph.p_flags = (1<<2)|1; 194 ph.p_offset = 0; 195 ph.p_vaddr = VADDR; 196 ph.p_paddr = 0; 197 ph.p_filesz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len; 198 ph.p_memsz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len; 199 ph.p_align = 4096; 200 201 fd = openat(AT_FDCWD, "/tmp", O_WRONLY|O_EXCL|O_TMPFILE, 0700); 202 if (fd == -1) { 203 exit(1); 204 } 205 206 if (writev(fd, iov, 3) != sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len) { 207 exit(1); 208 } 209 210 /* Avoid ETXTBSY on exec. */ 211 snprintf(buf, sizeof(buf), "/proc/self/fd/%u", fd); 212 fd1 = open(buf, O_RDONLY|O_CLOEXEC); 213 close(fd); 214 215 return fd1; 216 } 217 #endif 218 219 /* 220 * 0: vsyscall VMA doesn't exist vsyscall=none 221 * 1: vsyscall VMA is --xp vsyscall=xonly 222 * 2: vsyscall VMA is r-xp vsyscall=emulate 223 */ 224 static volatile int g_vsyscall; 225 static const char *str_vsyscall __maybe_unused; 226 227 static const char str_vsyscall_0[] __maybe_unused = ""; 228 static const char str_vsyscall_1[] __maybe_unused = 229 "ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]\n"; 230 static const char str_vsyscall_2[] __maybe_unused = 231 "ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n"; 232 233 #ifdef __x86_64__ 234 static void sigaction_SIGSEGV(int _, siginfo_t *__, void *___) 235 { 236 _exit(g_vsyscall); 237 } 238 239 /* 240 * vsyscall page can't be unmapped, probe it directly. 241 */ 242 static void vsyscall(void) 243 { 244 pid_t pid; 245 int wstatus; 246 247 pid = fork(); 248 if (pid < 0) { 249 fprintf(stderr, "fork, errno %d\n", errno); 250 exit(1); 251 } 252 if (pid == 0) { 253 struct rlimit rlim = {0, 0}; 254 (void)setrlimit(RLIMIT_CORE, &rlim); 255 256 /* Hide "segfault at ffffffffff600000" messages. */ 257 struct sigaction act; 258 memset(&act, 0, sizeof(struct sigaction)); 259 act.sa_flags = SA_SIGINFO; 260 act.sa_sigaction = sigaction_SIGSEGV; 261 (void)sigaction(SIGSEGV, &act, NULL); 262 263 g_vsyscall = 0; 264 /* gettimeofday(NULL, NULL); */ 265 uint64_t rax = 0xffffffffff600000; 266 asm volatile ( 267 "call *%[rax]" 268 : [rax] "+a" (rax) 269 : "D" (NULL), "S" (NULL) 270 : "rcx", "r11" 271 ); 272 273 g_vsyscall = 1; 274 *(volatile int *)0xffffffffff600000UL; 275 276 g_vsyscall = 2; 277 exit(g_vsyscall); 278 } 279 waitpid(pid, &wstatus, 0); 280 if (WIFEXITED(wstatus)) { 281 g_vsyscall = WEXITSTATUS(wstatus); 282 } else { 283 fprintf(stderr, "error: wstatus %08x\n", wstatus); 284 exit(1); 285 } 286 } 287 288 int main(void) 289 { 290 int pipefd[2]; 291 int exec_fd; 292 293 vsyscall(); 294 switch (g_vsyscall) { 295 case 0: 296 str_vsyscall = str_vsyscall_0; 297 break; 298 case 1: 299 str_vsyscall = str_vsyscall_1; 300 break; 301 case 2: 302 str_vsyscall = str_vsyscall_2; 303 break; 304 default: 305 abort(); 306 } 307 308 atexit(ate); 309 310 make_private_tmp(); 311 312 /* Reserve fd 0 for 1-byte pipe ping from child. */ 313 close(0); 314 if (open("/", O_RDONLY|O_DIRECTORY|O_PATH) != 0) { 315 return 1; 316 } 317 318 exec_fd = make_exe(payload, sizeof(payload)); 319 320 if (pipe(pipefd) == -1) { 321 return 1; 322 } 323 if (dup2(pipefd[1], 0) != 0) { 324 return 1; 325 } 326 327 pid = fork(); 328 if (pid == -1) { 329 return 1; 330 } 331 if (pid == 0) { 332 sys_execveat(exec_fd, "", NULL, NULL, AT_EMPTY_PATH); 333 return 1; 334 } 335 336 char _; 337 if (read(pipefd[0], &_, 1) != 1) { 338 return 1; 339 } 340 341 struct stat st; 342 if (fstat(exec_fd, &st) == -1) { 343 return 1; 344 } 345 346 /* Generate "head -n1 /proc/$PID/maps" */ 347 char buf0[256]; 348 memset(buf0, ' ', sizeof(buf0)); 349 int len = snprintf(buf0, sizeof(buf0), 350 "%08lx-%08lx r-xp 00000000 %02lx:%02lx %llu", 351 VADDR, VADDR + PAGE_SIZE, 352 MAJOR(st.st_dev), MINOR(st.st_dev), 353 (unsigned long long)st.st_ino); 354 buf0[len] = ' '; 355 snprintf(buf0 + MAPS_OFFSET, sizeof(buf0) - MAPS_OFFSET, 356 "/tmp/#%llu (deleted)\n", (unsigned long long)st.st_ino); 357 358 /* Test /proc/$PID/maps */ 359 { 360 const size_t len = strlen(buf0) + strlen(str_vsyscall); 361 char buf[256]; 362 ssize_t rv; 363 int fd; 364 365 snprintf(buf, sizeof(buf), "/proc/%u/maps", pid); 366 fd = open(buf, O_RDONLY); 367 if (fd == -1) { 368 return 1; 369 } 370 rv = read(fd, buf, sizeof(buf)); 371 assert(rv == len); 372 assert(memcmp(buf, buf0, strlen(buf0)) == 0); 373 if (g_vsyscall > 0) { 374 assert(memcmp(buf + strlen(buf0), str_vsyscall, strlen(str_vsyscall)) == 0); 375 } 376 } 377 378 /* Test /proc/$PID/smaps */ 379 { 380 char buf[4096]; 381 ssize_t rv; 382 int fd; 383 384 snprintf(buf, sizeof(buf), "/proc/%u/smaps", pid); 385 fd = open(buf, O_RDONLY); 386 if (fd == -1) { 387 return 1; 388 } 389 rv = read(fd, buf, sizeof(buf)); 390 assert(0 <= rv && rv <= sizeof(buf)); 391 392 assert(rv >= strlen(buf0)); 393 assert(memcmp(buf, buf0, strlen(buf0)) == 0); 394 395 #define RSS1 "Rss: 4 kB\n" 396 #define RSS2 "Rss: 0 kB\n" 397 #define PSS1 "Pss: 4 kB\n" 398 #define PSS2 "Pss: 0 kB\n" 399 assert(memmem(buf, rv, RSS1, strlen(RSS1)) || 400 memmem(buf, rv, RSS2, strlen(RSS2))); 401 assert(memmem(buf, rv, PSS1, strlen(PSS1)) || 402 memmem(buf, rv, PSS2, strlen(PSS2))); 403 404 static const char *S[] = { 405 "Size: 4 kB\n", 406 "KernelPageSize: 4 kB\n", 407 "MMUPageSize: 4 kB\n", 408 "Anonymous: 0 kB\n", 409 "AnonHugePages: 0 kB\n", 410 "Shared_Hugetlb: 0 kB\n", 411 "Private_Hugetlb: 0 kB\n", 412 "Locked: 0 kB\n", 413 }; 414 int i; 415 416 for (i = 0; i < ARRAY_SIZE(S); i++) { 417 assert(memmem(buf, rv, S[i], strlen(S[i]))); 418 } 419 420 if (g_vsyscall > 0) { 421 assert(memmem(buf, rv, str_vsyscall, strlen(str_vsyscall))); 422 } 423 } 424 425 /* Test /proc/$PID/smaps_rollup */ 426 { 427 char bufr[256]; 428 memset(bufr, ' ', sizeof(bufr)); 429 len = snprintf(bufr, sizeof(bufr), 430 "%08lx-%08lx ---p 00000000 00:00 0", 431 VADDR, VADDR + PAGE_SIZE); 432 bufr[len] = ' '; 433 snprintf(bufr + MAPS_OFFSET, sizeof(bufr) - MAPS_OFFSET, 434 "[rollup]\n"); 435 436 char buf[1024]; 437 ssize_t rv; 438 int fd; 439 440 snprintf(buf, sizeof(buf), "/proc/%u/smaps_rollup", pid); 441 fd = open(buf, O_RDONLY); 442 if (fd == -1) { 443 return 1; 444 } 445 rv = read(fd, buf, sizeof(buf)); 446 assert(0 <= rv && rv <= sizeof(buf)); 447 448 assert(rv >= strlen(bufr)); 449 assert(memcmp(buf, bufr, strlen(bufr)) == 0); 450 451 assert(memmem(buf, rv, RSS1, strlen(RSS1)) || 452 memmem(buf, rv, RSS2, strlen(RSS2))); 453 assert(memmem(buf, rv, PSS1, strlen(PSS1)) || 454 memmem(buf, rv, PSS2, strlen(PSS2))); 455 456 static const char *S[] = { 457 "Anonymous: 0 kB\n", 458 "AnonHugePages: 0 kB\n", 459 "Shared_Hugetlb: 0 kB\n", 460 "Private_Hugetlb: 0 kB\n", 461 "Locked: 0 kB\n", 462 }; 463 int i; 464 465 for (i = 0; i < ARRAY_SIZE(S); i++) { 466 assert(memmem(buf, rv, S[i], strlen(S[i]))); 467 } 468 } 469 470 /* Test /proc/$PID/statm */ 471 { 472 char buf[64]; 473 ssize_t rv; 474 int fd; 475 476 snprintf(buf, sizeof(buf), "/proc/%u/statm", pid); 477 fd = open(buf, O_RDONLY); 478 if (fd == -1) { 479 return 1; 480 } 481 rv = read(fd, buf, sizeof(buf)); 482 assert(rv == 7 * 2); 483 484 assert(buf[0] == '1'); /* ->total_vm */ 485 assert(buf[1] == ' '); 486 assert(buf[2] == '0' || buf[2] == '1'); /* rss */ 487 assert(buf[3] == ' '); 488 assert(buf[4] == '0' || buf[2] == '1'); /* file rss */ 489 assert(buf[5] == ' '); 490 assert(buf[6] == '1'); /* ELF executable segments */ 491 assert(buf[7] == ' '); 492 assert(buf[8] == '0'); 493 assert(buf[9] == ' '); 494 assert(buf[10] == '0'); /* ->data_vm + ->stack_vm */ 495 assert(buf[11] == ' '); 496 assert(buf[12] == '0'); 497 assert(buf[13] == '\n'); 498 } 499 500 /* Test PROCMAP_QUERY ioctl() for /proc/$PID/maps */ 501 { 502 char path_buf[256], exp_path_buf[256]; 503 struct procmap_query q; 504 int fd, err; 505 506 snprintf(path_buf, sizeof(path_buf), "/proc/%u/maps", pid); 507 fd = open(path_buf, O_RDONLY); 508 if (fd == -1) 509 return 1; 510 511 /* CASE 1: exact MATCH at VADDR */ 512 memset(&q, 0, sizeof(q)); 513 q.size = sizeof(q); 514 q.query_addr = VADDR; 515 q.query_flags = 0; 516 q.vma_name_addr = (__u64)(unsigned long)path_buf; 517 q.vma_name_size = sizeof(path_buf); 518 519 err = ioctl(fd, PROCMAP_QUERY, &q); 520 assert(err == 0); 521 522 assert(q.query_addr == VADDR); 523 assert(q.query_flags == 0); 524 525 assert(q.vma_flags == (PROCMAP_QUERY_VMA_READABLE | PROCMAP_QUERY_VMA_EXECUTABLE)); 526 assert(q.vma_start == VADDR); 527 assert(q.vma_end == VADDR + PAGE_SIZE); 528 assert(q.vma_page_size == PAGE_SIZE); 529 530 assert(q.vma_offset == 0); 531 assert(q.inode == st.st_ino); 532 assert(q.dev_major == MAJOR(st.st_dev)); 533 assert(q.dev_minor == MINOR(st.st_dev)); 534 535 snprintf(exp_path_buf, sizeof(exp_path_buf), 536 "/tmp/#%llu (deleted)", (unsigned long long)st.st_ino); 537 assert(q.vma_name_size == strlen(exp_path_buf) + 1); 538 assert(strcmp(path_buf, exp_path_buf) == 0); 539 540 /* CASE 2: NO MATCH at VADDR-1 */ 541 memset(&q, 0, sizeof(q)); 542 q.size = sizeof(q); 543 q.query_addr = VADDR - 1; 544 q.query_flags = 0; /* exact match */ 545 546 err = ioctl(fd, PROCMAP_QUERY, &q); 547 err = err < 0 ? -errno : 0; 548 assert(err == -ENOENT); 549 550 /* CASE 3: MATCH COVERING_OR_NEXT_VMA at VADDR - 1 */ 551 memset(&q, 0, sizeof(q)); 552 q.size = sizeof(q); 553 q.query_addr = VADDR - 1; 554 q.query_flags = PROCMAP_QUERY_COVERING_OR_NEXT_VMA; 555 556 err = ioctl(fd, PROCMAP_QUERY, &q); 557 assert(err == 0); 558 559 assert(q.query_addr == VADDR - 1); 560 assert(q.query_flags == PROCMAP_QUERY_COVERING_OR_NEXT_VMA); 561 assert(q.vma_start == VADDR); 562 assert(q.vma_end == VADDR + PAGE_SIZE); 563 564 /* CASE 4: NO MATCH at VADDR + PAGE_SIZE */ 565 memset(&q, 0, sizeof(q)); 566 q.size = sizeof(q); 567 q.query_addr = VADDR + PAGE_SIZE; /* point right after the VMA */ 568 q.query_flags = PROCMAP_QUERY_COVERING_OR_NEXT_VMA; 569 570 err = ioctl(fd, PROCMAP_QUERY, &q); 571 err = err < 0 ? -errno : 0; 572 assert(err == -ENOENT); 573 574 /* CASE 5: NO MATCH WRITABLE at VADDR */ 575 memset(&q, 0, sizeof(q)); 576 q.size = sizeof(q); 577 q.query_addr = VADDR; 578 q.query_flags = PROCMAP_QUERY_VMA_WRITABLE; 579 580 err = ioctl(fd, PROCMAP_QUERY, &q); 581 err = err < 0 ? -errno : 0; 582 assert(err == -ENOENT); 583 } 584 585 return 0; 586 } 587 #else 588 int main(void) 589 { 590 return 4; 591 } 592 #endif 593