1 /* 2 * Copyright (c) 2019 Alexey Dobriyan <adobriyan@gmail.com> 3 * 4 * Permission to use, copy, modify, and distribute this software for any 5 * purpose with or without fee is hereby granted, provided that the above 6 * copyright notice and this permission notice appear in all copies. 7 * 8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 */ 16 /* 17 * Fork and exec tiny 1 page executable which precisely controls its VM. 18 * Test /proc/$PID/maps 19 * Test /proc/$PID/smaps 20 * Test /proc/$PID/smaps_rollup 21 * Test /proc/$PID/statm 22 * 23 * FIXME require CONFIG_TMPFS which can be disabled 24 * FIXME test other values from "smaps" 25 * FIXME support other archs 26 */ 27 #undef NDEBUG 28 #include <assert.h> 29 #include <errno.h> 30 #include <sched.h> 31 #include <signal.h> 32 #include <stdbool.h> 33 #include <stdint.h> 34 #include <stdio.h> 35 #include <string.h> 36 #include <stdlib.h> 37 #include <sys/mount.h> 38 #include <sys/types.h> 39 #include <sys/stat.h> 40 #include <sys/wait.h> 41 #include <fcntl.h> 42 #include <unistd.h> 43 #include <sys/syscall.h> 44 #include <sys/uio.h> 45 #include <linux/kdev_t.h> 46 #include <sys/time.h> 47 #include <sys/resource.h> 48 #include <linux/fs.h> 49 50 #include "../kselftest.h" 51 52 static inline long sys_execveat(int dirfd, const char *pathname, char **argv, char **envp, int flags) 53 { 54 return syscall(SYS_execveat, dirfd, pathname, argv, envp, flags); 55 } 56 57 static void make_private_tmp(void) 58 { 59 if (unshare(CLONE_NEWNS) == -1) { 60 if (errno == ENOSYS || errno == EPERM) { 61 exit(4); 62 } 63 exit(1); 64 } 65 if (mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) == -1) { 66 exit(1); 67 } 68 if (mount(NULL, "/tmp", "tmpfs", 0, NULL) == -1) { 69 exit(1); 70 } 71 } 72 73 static pid_t pid = -1; 74 static void ate(void) 75 { 76 if (pid > 0) { 77 kill(pid, SIGTERM); 78 } 79 } 80 81 struct elf64_hdr { 82 uint8_t e_ident[16]; 83 uint16_t e_type; 84 uint16_t e_machine; 85 uint32_t e_version; 86 uint64_t e_entry; 87 uint64_t e_phoff; 88 uint64_t e_shoff; 89 uint32_t e_flags; 90 uint16_t e_ehsize; 91 uint16_t e_phentsize; 92 uint16_t e_phnum; 93 uint16_t e_shentsize; 94 uint16_t e_shnum; 95 uint16_t e_shstrndx; 96 }; 97 98 struct elf64_phdr { 99 uint32_t p_type; 100 uint32_t p_flags; 101 uint64_t p_offset; 102 uint64_t p_vaddr; 103 uint64_t p_paddr; 104 uint64_t p_filesz; 105 uint64_t p_memsz; 106 uint64_t p_align; 107 }; 108 109 #ifdef __x86_64__ 110 #define PAGE_SIZE 4096 111 #define VADDR (1UL << 32) 112 #define MAPS_OFFSET 73 113 114 #define syscall 0x0f, 0x05 115 #define mov_rdi(x) \ 116 0x48, 0xbf, \ 117 (x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff, \ 118 ((x)>>32)&0xff, ((x)>>40)&0xff, ((x)>>48)&0xff, ((x)>>56)&0xff 119 120 #define mov_rsi(x) \ 121 0x48, 0xbe, \ 122 (x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff, \ 123 ((x)>>32)&0xff, ((x)>>40)&0xff, ((x)>>48)&0xff, ((x)>>56)&0xff 124 125 #define mov_eax(x) \ 126 0xb8, (x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff 127 128 static const uint8_t payload[] = { 129 /* Casually unmap stack, vDSO and everything else. */ 130 /* munmap */ 131 mov_rdi(VADDR + 4096), 132 mov_rsi((1ULL << 47) - 4096 - VADDR - 4096), 133 mov_eax(11), 134 syscall, 135 136 /* Ping parent. */ 137 /* write(0, &c, 1); */ 138 0x31, 0xff, /* xor edi, edi */ 139 0x48, 0x8d, 0x35, 0x00, 0x00, 0x00, 0x00, /* lea rsi, [rip] */ 140 0xba, 0x01, 0x00, 0x00, 0x00, /* mov edx, 1 */ 141 mov_eax(1), 142 syscall, 143 144 /* 1: pause(); */ 145 mov_eax(34), 146 syscall, 147 148 0xeb, 0xf7, /* jmp 1b */ 149 }; 150 151 static int make_exe(const uint8_t *payload, size_t len) 152 { 153 struct elf64_hdr h; 154 struct elf64_phdr ph; 155 156 struct iovec iov[3] = { 157 {&h, sizeof(struct elf64_hdr)}, 158 {&ph, sizeof(struct elf64_phdr)}, 159 {(void *)payload, len}, 160 }; 161 int fd, fd1; 162 char buf[64]; 163 164 memset(&h, 0, sizeof(h)); 165 h.e_ident[0] = 0x7f; 166 h.e_ident[1] = 'E'; 167 h.e_ident[2] = 'L'; 168 h.e_ident[3] = 'F'; 169 h.e_ident[4] = 2; 170 h.e_ident[5] = 1; 171 h.e_ident[6] = 1; 172 h.e_ident[7] = 0; 173 h.e_type = 2; 174 h.e_machine = 0x3e; 175 h.e_version = 1; 176 h.e_entry = VADDR + sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr); 177 h.e_phoff = sizeof(struct elf64_hdr); 178 h.e_shoff = 0; 179 h.e_flags = 0; 180 h.e_ehsize = sizeof(struct elf64_hdr); 181 h.e_phentsize = sizeof(struct elf64_phdr); 182 h.e_phnum = 1; 183 h.e_shentsize = 0; 184 h.e_shnum = 0; 185 h.e_shstrndx = 0; 186 187 memset(&ph, 0, sizeof(ph)); 188 ph.p_type = 1; 189 ph.p_flags = (1<<2)|1; 190 ph.p_offset = 0; 191 ph.p_vaddr = VADDR; 192 ph.p_paddr = 0; 193 ph.p_filesz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len; 194 ph.p_memsz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len; 195 ph.p_align = 4096; 196 197 fd = openat(AT_FDCWD, "/tmp", O_WRONLY|O_EXCL|O_TMPFILE, 0700); 198 if (fd == -1) { 199 exit(1); 200 } 201 202 if (writev(fd, iov, 3) != sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len) { 203 exit(1); 204 } 205 206 /* Avoid ETXTBSY on exec. */ 207 snprintf(buf, sizeof(buf), "/proc/self/fd/%u", fd); 208 fd1 = open(buf, O_RDONLY|O_CLOEXEC); 209 close(fd); 210 211 return fd1; 212 } 213 #endif 214 215 /* 216 * 0: vsyscall VMA doesn't exist vsyscall=none 217 * 1: vsyscall VMA is --xp vsyscall=xonly 218 * 2: vsyscall VMA is r-xp vsyscall=emulate 219 */ 220 static volatile int g_vsyscall; 221 static const char *str_vsyscall; 222 223 static const char str_vsyscall_0[] = ""; 224 static const char str_vsyscall_1[] = 225 "ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]\n"; 226 static const char str_vsyscall_2[] = 227 "ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n"; 228 229 #ifdef __x86_64__ 230 static void sigaction_SIGSEGV(int _, siginfo_t *__, void *___) 231 { 232 _exit(g_vsyscall); 233 } 234 235 /* 236 * vsyscall page can't be unmapped, probe it directly. 237 */ 238 static void vsyscall(void) 239 { 240 pid_t pid; 241 int wstatus; 242 243 pid = fork(); 244 if (pid < 0) { 245 fprintf(stderr, "fork, errno %d\n", errno); 246 exit(1); 247 } 248 if (pid == 0) { 249 struct rlimit rlim = {0, 0}; 250 (void)setrlimit(RLIMIT_CORE, &rlim); 251 252 /* Hide "segfault at ffffffffff600000" messages. */ 253 struct sigaction act; 254 memset(&act, 0, sizeof(struct sigaction)); 255 act.sa_flags = SA_SIGINFO; 256 act.sa_sigaction = sigaction_SIGSEGV; 257 (void)sigaction(SIGSEGV, &act, NULL); 258 259 g_vsyscall = 0; 260 /* gettimeofday(NULL, NULL); */ 261 uint64_t rax = 0xffffffffff600000; 262 asm volatile ( 263 "call *%[rax]" 264 : [rax] "+a" (rax) 265 : "D" (NULL), "S" (NULL) 266 : "rcx", "r11" 267 ); 268 269 g_vsyscall = 1; 270 *(volatile int *)0xffffffffff600000UL; 271 272 g_vsyscall = 2; 273 exit(g_vsyscall); 274 } 275 waitpid(pid, &wstatus, 0); 276 if (WIFEXITED(wstatus)) { 277 g_vsyscall = WEXITSTATUS(wstatus); 278 } else { 279 fprintf(stderr, "error: wstatus %08x\n", wstatus); 280 exit(1); 281 } 282 } 283 284 int main(void) 285 { 286 int pipefd[2]; 287 int exec_fd; 288 289 vsyscall(); 290 switch (g_vsyscall) { 291 case 0: 292 str_vsyscall = str_vsyscall_0; 293 break; 294 case 1: 295 str_vsyscall = str_vsyscall_1; 296 break; 297 case 2: 298 str_vsyscall = str_vsyscall_2; 299 break; 300 default: 301 abort(); 302 } 303 304 atexit(ate); 305 306 make_private_tmp(); 307 308 /* Reserve fd 0 for 1-byte pipe ping from child. */ 309 close(0); 310 if (open("/", O_RDONLY|O_DIRECTORY|O_PATH) != 0) { 311 return 1; 312 } 313 314 exec_fd = make_exe(payload, sizeof(payload)); 315 316 if (pipe(pipefd) == -1) { 317 return 1; 318 } 319 if (dup2(pipefd[1], 0) != 0) { 320 return 1; 321 } 322 323 pid = fork(); 324 if (pid == -1) { 325 return 1; 326 } 327 if (pid == 0) { 328 sys_execveat(exec_fd, "", NULL, NULL, AT_EMPTY_PATH); 329 return 1; 330 } 331 332 char _; 333 if (read(pipefd[0], &_, 1) != 1) { 334 return 1; 335 } 336 337 struct stat st; 338 if (fstat(exec_fd, &st) == -1) { 339 return 1; 340 } 341 342 /* Generate "head -n1 /proc/$PID/maps" */ 343 char buf0[256]; 344 memset(buf0, ' ', sizeof(buf0)); 345 int len = snprintf(buf0, sizeof(buf0), 346 "%08lx-%08lx r-xp 00000000 %02lx:%02lx %llu", 347 VADDR, VADDR + PAGE_SIZE, 348 MAJOR(st.st_dev), MINOR(st.st_dev), 349 (unsigned long long)st.st_ino); 350 buf0[len] = ' '; 351 snprintf(buf0 + MAPS_OFFSET, sizeof(buf0) - MAPS_OFFSET, 352 "/tmp/#%llu (deleted)\n", (unsigned long long)st.st_ino); 353 354 /* Test /proc/$PID/maps */ 355 { 356 const size_t len = strlen(buf0) + strlen(str_vsyscall); 357 char buf[256]; 358 ssize_t rv; 359 int fd; 360 361 snprintf(buf, sizeof(buf), "/proc/%u/maps", pid); 362 fd = open(buf, O_RDONLY); 363 if (fd == -1) { 364 return 1; 365 } 366 rv = read(fd, buf, sizeof(buf)); 367 assert(rv == len); 368 assert(memcmp(buf, buf0, strlen(buf0)) == 0); 369 if (g_vsyscall > 0) { 370 assert(memcmp(buf + strlen(buf0), str_vsyscall, strlen(str_vsyscall)) == 0); 371 } 372 } 373 374 /* Test /proc/$PID/smaps */ 375 { 376 char buf[4096]; 377 ssize_t rv; 378 int fd; 379 380 snprintf(buf, sizeof(buf), "/proc/%u/smaps", pid); 381 fd = open(buf, O_RDONLY); 382 if (fd == -1) { 383 return 1; 384 } 385 rv = read(fd, buf, sizeof(buf)); 386 assert(0 <= rv && rv <= sizeof(buf)); 387 388 assert(rv >= strlen(buf0)); 389 assert(memcmp(buf, buf0, strlen(buf0)) == 0); 390 391 #define RSS1 "Rss: 4 kB\n" 392 #define RSS2 "Rss: 0 kB\n" 393 #define PSS1 "Pss: 4 kB\n" 394 #define PSS2 "Pss: 0 kB\n" 395 assert(memmem(buf, rv, RSS1, strlen(RSS1)) || 396 memmem(buf, rv, RSS2, strlen(RSS2))); 397 assert(memmem(buf, rv, PSS1, strlen(PSS1)) || 398 memmem(buf, rv, PSS2, strlen(PSS2))); 399 400 static const char *S[] = { 401 "Size: 4 kB\n", 402 "KernelPageSize: 4 kB\n", 403 "MMUPageSize: 4 kB\n", 404 "Anonymous: 0 kB\n", 405 "AnonHugePages: 0 kB\n", 406 "Shared_Hugetlb: 0 kB\n", 407 "Private_Hugetlb: 0 kB\n", 408 "Locked: 0 kB\n", 409 }; 410 int i; 411 412 for (i = 0; i < ARRAY_SIZE(S); i++) { 413 assert(memmem(buf, rv, S[i], strlen(S[i]))); 414 } 415 416 if (g_vsyscall > 0) { 417 assert(memmem(buf, rv, str_vsyscall, strlen(str_vsyscall))); 418 } 419 } 420 421 /* Test /proc/$PID/smaps_rollup */ 422 { 423 char bufr[256]; 424 memset(bufr, ' ', sizeof(bufr)); 425 len = snprintf(bufr, sizeof(bufr), 426 "%08lx-%08lx ---p 00000000 00:00 0", 427 VADDR, VADDR + PAGE_SIZE); 428 bufr[len] = ' '; 429 snprintf(bufr + MAPS_OFFSET, sizeof(bufr) - MAPS_OFFSET, 430 "[rollup]\n"); 431 432 char buf[1024]; 433 ssize_t rv; 434 int fd; 435 436 snprintf(buf, sizeof(buf), "/proc/%u/smaps_rollup", pid); 437 fd = open(buf, O_RDONLY); 438 if (fd == -1) { 439 return 1; 440 } 441 rv = read(fd, buf, sizeof(buf)); 442 assert(0 <= rv && rv <= sizeof(buf)); 443 444 assert(rv >= strlen(bufr)); 445 assert(memcmp(buf, bufr, strlen(bufr)) == 0); 446 447 assert(memmem(buf, rv, RSS1, strlen(RSS1)) || 448 memmem(buf, rv, RSS2, strlen(RSS2))); 449 assert(memmem(buf, rv, PSS1, strlen(PSS1)) || 450 memmem(buf, rv, PSS2, strlen(PSS2))); 451 452 static const char *S[] = { 453 "Anonymous: 0 kB\n", 454 "AnonHugePages: 0 kB\n", 455 "Shared_Hugetlb: 0 kB\n", 456 "Private_Hugetlb: 0 kB\n", 457 "Locked: 0 kB\n", 458 }; 459 int i; 460 461 for (i = 0; i < ARRAY_SIZE(S); i++) { 462 assert(memmem(buf, rv, S[i], strlen(S[i]))); 463 } 464 } 465 466 /* Test /proc/$PID/statm */ 467 { 468 char buf[64]; 469 ssize_t rv; 470 int fd; 471 472 snprintf(buf, sizeof(buf), "/proc/%u/statm", pid); 473 fd = open(buf, O_RDONLY); 474 if (fd == -1) { 475 return 1; 476 } 477 rv = read(fd, buf, sizeof(buf)); 478 assert(rv == 7 * 2); 479 480 assert(buf[0] == '1'); /* ->total_vm */ 481 assert(buf[1] == ' '); 482 assert(buf[2] == '0' || buf[2] == '1'); /* rss */ 483 assert(buf[3] == ' '); 484 assert(buf[4] == '0' || buf[2] == '1'); /* file rss */ 485 assert(buf[5] == ' '); 486 assert(buf[6] == '1'); /* ELF executable segments */ 487 assert(buf[7] == ' '); 488 assert(buf[8] == '0'); 489 assert(buf[9] == ' '); 490 assert(buf[10] == '0'); /* ->data_vm + ->stack_vm */ 491 assert(buf[11] == ' '); 492 assert(buf[12] == '0'); 493 assert(buf[13] == '\n'); 494 } 495 496 /* Test PROCMAP_QUERY ioctl() for /proc/$PID/maps */ 497 { 498 char path_buf[256], exp_path_buf[256]; 499 struct procmap_query q; 500 int fd, err; 501 502 snprintf(path_buf, sizeof(path_buf), "/proc/%u/maps", pid); 503 fd = open(path_buf, O_RDONLY); 504 if (fd == -1) 505 return 1; 506 507 /* CASE 1: exact MATCH at VADDR */ 508 memset(&q, 0, sizeof(q)); 509 q.size = sizeof(q); 510 q.query_addr = VADDR; 511 q.query_flags = 0; 512 q.vma_name_addr = (__u64)(unsigned long)path_buf; 513 q.vma_name_size = sizeof(path_buf); 514 515 err = ioctl(fd, PROCMAP_QUERY, &q); 516 assert(err == 0); 517 518 assert(q.query_addr == VADDR); 519 assert(q.query_flags == 0); 520 521 assert(q.vma_flags == (PROCMAP_QUERY_VMA_READABLE | PROCMAP_QUERY_VMA_EXECUTABLE)); 522 assert(q.vma_start == VADDR); 523 assert(q.vma_end == VADDR + PAGE_SIZE); 524 assert(q.vma_page_size == PAGE_SIZE); 525 526 assert(q.vma_offset == 0); 527 assert(q.inode == st.st_ino); 528 assert(q.dev_major == MAJOR(st.st_dev)); 529 assert(q.dev_minor == MINOR(st.st_dev)); 530 531 snprintf(exp_path_buf, sizeof(exp_path_buf), 532 "/tmp/#%llu (deleted)", (unsigned long long)st.st_ino); 533 assert(q.vma_name_size == strlen(exp_path_buf) + 1); 534 assert(strcmp(path_buf, exp_path_buf) == 0); 535 536 /* CASE 2: NO MATCH at VADDR-1 */ 537 memset(&q, 0, sizeof(q)); 538 q.size = sizeof(q); 539 q.query_addr = VADDR - 1; 540 q.query_flags = 0; /* exact match */ 541 542 err = ioctl(fd, PROCMAP_QUERY, &q); 543 err = err < 0 ? -errno : 0; 544 assert(err == -ENOENT); 545 546 /* CASE 3: MATCH COVERING_OR_NEXT_VMA at VADDR - 1 */ 547 memset(&q, 0, sizeof(q)); 548 q.size = sizeof(q); 549 q.query_addr = VADDR - 1; 550 q.query_flags = PROCMAP_QUERY_COVERING_OR_NEXT_VMA; 551 552 err = ioctl(fd, PROCMAP_QUERY, &q); 553 assert(err == 0); 554 555 assert(q.query_addr == VADDR - 1); 556 assert(q.query_flags == PROCMAP_QUERY_COVERING_OR_NEXT_VMA); 557 assert(q.vma_start == VADDR); 558 assert(q.vma_end == VADDR + PAGE_SIZE); 559 560 /* CASE 4: NO MATCH at VADDR + PAGE_SIZE */ 561 memset(&q, 0, sizeof(q)); 562 q.size = sizeof(q); 563 q.query_addr = VADDR + PAGE_SIZE; /* point right after the VMA */ 564 q.query_flags = PROCMAP_QUERY_COVERING_OR_NEXT_VMA; 565 566 err = ioctl(fd, PROCMAP_QUERY, &q); 567 err = err < 0 ? -errno : 0; 568 assert(err == -ENOENT); 569 570 /* CASE 5: NO MATCH WRITABLE at VADDR */ 571 memset(&q, 0, sizeof(q)); 572 q.size = sizeof(q); 573 q.query_addr = VADDR; 574 q.query_flags = PROCMAP_QUERY_VMA_WRITABLE; 575 576 err = ioctl(fd, PROCMAP_QUERY, &q); 577 err = err < 0 ? -errno : 0; 578 assert(err == -ENOENT); 579 } 580 581 return 0; 582 } 583 #else 584 int main(void) 585 { 586 return 4; 587 } 588 #endif 589